forked from pythonbrasil/wiki
-
Notifications
You must be signed in to change notification settings - Fork 0
/
empresas_generator.py
65 lines (51 loc) · 1.83 KB
/
empresas_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
# -*- coding: utf-8 -*- #
import json
import os
try:
# python 2
from urllib2 import urlopen
except ImportError:
# python 3
from urllib.request import urlopen
from slugify import slugify
# Diretório onde serão gerados os arquivos JSON
PAGE_PATH = 'content/empresas/'
# Arquivo que será consumido para gerar os arquivos JSON
EMPRESAS_FILE = 'https://raw.githubusercontent.com/pythonbrasil/pyBusinesses-BR/master/README.md'
EMPRESAS_LOGO_PATH = 'https://raw.githubusercontent.com/pythonbrasil/pyBusinesses-BR/master/'
def scrapping_empresas():
file = urlopen(EMPRESAS_FILE)
file = file.read().decode()
region = state = city = ''
empresas = []
for line in file.split('\n'):
if line.startswith('## '):
region = line[2:].strip()
elif line.startswith('### '):
state = line[3:].strip()
elif line.startswith('#### '):
city = line[4:].strip()
elif line.startswith('!') and region and state and city:
parts = line.split('|')
site = parts[2].split('(')[1].strip().strip(')')
name = parts[1].strip()
logo = EMPRESAS_LOGO_PATH + parts[0].split(
'(')[1].strip().strip(')')
empresas.append({
'nome': name,
'regiao': region,
'estado': state,
'cidade': city,
'site': site,
'logo': logo,
})
return empresas
if __name__ == '__main__':
for empresa in scrapping_empresas():
filename = '{0}-{1}.json'.format(
slugify(empresa['nome']), slugify(empresa['cidade']))
if not os.path.exists(PAGE_PATH):
os.makedirs(PAGE_PATH)
with open(os.path.join(PAGE_PATH, filename), 'w') as file:
json.dump(empresa, file)