Skip to content

Commit

Permalink
Merge old data
Browse files Browse the repository at this point in the history
  • Loading branch information
NicolasGrosjean committed Feb 16, 2020
1 parent 4c486d8 commit 59874eb
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 2 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ pip install https://github.com/pyinstaller/pyinstaller/tarball/develop
To generate an updated version of the .exe file, run the following command
````
pyinstaller source/main/manage_supplier_data.py -F
pyinstaller source/main/get_old_data.py -F
````

The *.exe* file will be find in the dist directory.
Expand Down Expand Up @@ -106,6 +107,7 @@ Run the following command to know the parameters.

````
python source/main/manage_supplier_data.py -h
python source/main/get_old_data.py -h
````

## License
Expand Down
20 changes: 20 additions & 0 deletions data/address_to_coords.csv
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,23 @@ SCI Les 3 cols Loubet 38930 LE MONESTIER DU PERCY;44.7913;5.663
FROMAGERIE CHABERT 74150 VALLI�RES;45.9009;5.9385
LA FERME DE L ESPINASSE 38210 MONTAUD;45.2618;5.5599
40 avenue Marcelin Berthelot Maison des agriculteurs 38000 GRENOBLE;45.1894;5.7039
All�e du Commerce �quitable 32500 FLEURANCE;43.8546;0.6479
ZA La Palun 26170 BUIS LES BARONNIES;44.2649;5.2633
645 rue des champs De la pierre 74540 ALBY SUR CHERAN;45.8204;6.0044
23 avenue Reille 75014 PARIS 14;48.8247;2.3402
Route d?Orci�res 5500 ST LAURENT DU CROS;44.6477;6.1009
450 Chemin de la Gustini�re Les Roches 38260 PAJAY;45.3505;5.1077
185 chemin du Thuve 4700 ORAISON;43.9083;5.914
9 chemin des B�rangers Les B�rangers 26400 BEAUFORT SUR GERVANNE;44.7717;5.1455
Microcosme SAS 59 avenue Albert Calmette 59910 BONDUES;50.7183;3.128
50 chemin des brunieres 38210 LA RIVIERE;45.2376;5.5085
Le Fay 38190 STE AGNES;45.221;5.9376
Les gramailles 7360 ST MICHEL DE CHABRILLANOUX;44.8371;4.5953
54 chemin du Murier 38410 ST MARTIN D'URIAGE;45.1505;5.8163
53 boulevard victor hugo 44200 NANTES;47.2025;-1.5487
Le moulin 38710 LAVARS;44.8573;5.6808
8 mont�e des Usines 26150 DIE;44.7546;5.3745
190 impasse du Teura 38190 BERNIN;45.2656;5.8778
410 route des Saules 73110 ROTHERENS;45.4718;6.1357
25 cours de Verdun 38200 VIENNE;45.5169;4.8692
Route d�Orci�res 5500 ST LAURENT DU CROS;44.6477;6.1009
38 changes: 38 additions & 0 deletions source/main/get_old_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import argparse
import os
import pandas as pd

from source.main.address_to_coords import get_data_dir


def get_args():
parser = argparse.ArgumentParser(description='Transform supplier file to usable file by umap')
parser.add_argument('old_filename', type=str, help='Name of the previous data file to read from the data directory')
parser.add_argument('new_filename', type=str, help='Name of the new data file to read from the data directory')
return parser.parse_args()


if __name__ == '__main__':
args = get_args()

# Read data
old_data = pd.read_csv(os.path.join(get_data_dir(), args.old_filename), encoding='cp1252', sep=';')
new_data = pd.read_csv(os.path.join(get_data_dir(), args.new_filename), encoding='cp1252', sep=';')

# Fill new data with old ones
columns_to_keep = ['Adresse 1', 'Adresse 2', 'C.Postal', 'Ville', 'RaisonSociale', 'Pays', 'Site',
'Date partenariat', 'Rayon', 'Date mise à jour', 'Description', 'Slogan', 'Photo', 'Produits']
new_data = pd.merge(new_data, old_data[columns_to_keep], on='RaisonSociale', how='left', suffixes=('', '_old'))

# Fix missing data
for col in ['Adresse 1', 'Adresse 2', 'C.Postal', 'Ville']:
new_data[col] = new_data[col].fillna(new_data[col + '_old'])
del new_data[col + '_old']

# Fix type
new_data['C.Postal'] = new_data['C.Postal'].fillna(0)
new_data['C.Postal'] = new_data['C.Postal'].astype(int)

# Export modified new file
filename = args.new_filename[:-4] + '_modified.csv'
new_data.to_csv(os.path.join(get_data_dir(), filename), encoding='cp1252', index=None, sep=';')
4 changes: 2 additions & 2 deletions source/main/manage_supplier_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ def get_args():
address = row['Adresse 1'] if not pd.isnull(row['Adresse 1']) else row['RaisonSociale']
if not pd.isnull(row['Adresse 2']):
address += ' ' + row['Adresse 2']
if pd.isnull(row['C.Postal']):
if pd.isnull(row['C.Postal']) or int(row['C.Postal']) == 0:
continue
address += ' ' + str(int(row['C.Postal'])) + ' ' + row['Ville']
location = addr2c.get_coordinates(address, row['Pays'])
location = addr2c.get_coordinates(address, 'France' if pd.isnull(row['Pays']) else row['Pays'])
data.loc[index, 'lat'] = location[0]
data.loc[index, 'lon'] = location[1]
finally:
Expand Down

0 comments on commit 59874eb

Please sign in to comment.