Merge old data

elefan-grenoble · Feb 16, 2020 · 59874eb · 59874eb
1 parent 4c486d8
commit 59874eb
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -68,6 +68,7 @@ pip install https://github.com/pyinstaller/pyinstaller/tarball/develop
 To generate an updated version of the .exe file, run the following command
 ````
 pyinstaller source/main/manage_supplier_data.py -F
+pyinstaller source/main/get_old_data.py -F
 ````
 
 The *.exe* file will be find in the dist directory.
@@ -106,6 +107,7 @@ Run the following command to know the parameters.
 
 ````
 python source/main/manage_supplier_data.py -h
+python source/main/get_old_data.py -h
 ````
 
 ## License

diff --git a/data/address_to_coords.csv b/data/address_to_coords.csv
@@ -102,3 +102,23 @@ SCI Les 3 cols Loubet 38930 LE MONESTIER DU PERCY;44.7913;5.663
 FROMAGERIE CHABERT 74150 VALLI�RES;45.9009;5.9385
 LA FERME DE L ESPINASSE 38210 MONTAUD;45.2618;5.5599
 40 avenue Marcelin Berthelot Maison des agriculteurs 38000 GRENOBLE;45.1894;5.7039
+All�e du Commerce �quitable  32500 FLEURANCE;43.8546;0.6479
+ZA La Palun  26170 BUIS LES BARONNIES;44.2649;5.2633
+645 rue des champs  De la pierre 74540 ALBY SUR CHERAN;45.8204;6.0044
+23 avenue Reille  75014 PARIS 14;48.8247;2.3402
+Route d?Orci�res 5500 ST LAURENT DU CROS;44.6477;6.1009
+450 Chemin de la Gustini�re  Les Roches 38260 PAJAY;45.3505;5.1077
+185 chemin du Thuve  4700 ORAISON;43.9083;5.914
+9 chemin des B�rangers  Les B�rangers 26400 BEAUFORT SUR GERVANNE;44.7717;5.1455
+Microcosme SAS  59 avenue Albert Calmette 59910 BONDUES;50.7183;3.128
+50 chemin des brunieres  38210 LA RIVIERE;45.2376;5.5085
+Le Fay  38190 STE AGNES;45.221;5.9376
+Les gramailles  7360 ST MICHEL DE CHABRILLANOUX;44.8371;4.5953
+54 chemin du Murier  38410 ST MARTIN D'URIAGE;45.1505;5.8163
+53 boulevard victor hugo  44200 NANTES;47.2025;-1.5487
+Le moulin 38710 LAVARS;44.8573;5.6808
+8 mont�e des Usines  26150 DIE;44.7546;5.3745
+190 impasse du Teura 38190 BERNIN;45.2656;5.8778
+410 route des Saules 73110 ROTHERENS;45.4718;6.1357
+25 cours de Verdun 38200 VIENNE;45.5169;4.8692
+Route d�Orci�res 5500 ST LAURENT DU CROS;44.6477;6.1009
diff --git a/source/main/get_old_data.py b/source/main/get_old_data.py
@@ -0,0 +1,38 @@
+import argparse
+import os
+import pandas as pd
+
+from source.main.address_to_coords import get_data_dir
+
+
+def get_args():
+    parser = argparse.ArgumentParser(description='Transform supplier file to usable file by umap')
+    parser.add_argument('old_filename', type=str, help='Name of the previous data file to read from the data directory')
+    parser.add_argument('new_filename', type=str, help='Name of the new data file to read from the data directory')
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    args = get_args()
+
+    # Read data
+    old_data = pd.read_csv(os.path.join(get_data_dir(), args.old_filename), encoding='cp1252', sep=';')
+    new_data = pd.read_csv(os.path.join(get_data_dir(), args.new_filename), encoding='cp1252', sep=';')
+
+    # Fill new data with old ones
+    columns_to_keep = ['Adresse 1', 'Adresse 2', 'C.Postal', 'Ville', 'RaisonSociale', 'Pays', 'Site',
+                       'Date partenariat', 'Rayon', 'Date mise à jour', 'Description', 'Slogan', 'Photo', 'Produits']
+    new_data = pd.merge(new_data, old_data[columns_to_keep], on='RaisonSociale', how='left', suffixes=('', '_old'))
+
+    # Fix missing data
+    for col in ['Adresse 1', 'Adresse 2', 'C.Postal', 'Ville']:
+        new_data[col] = new_data[col].fillna(new_data[col + '_old'])
+        del new_data[col + '_old']
+
+    # Fix type
+    new_data['C.Postal'] = new_data['C.Postal'].fillna(0)
+    new_data['C.Postal'] = new_data['C.Postal'].astype(int)
+
+    # Export modified new file
+    filename = args.new_filename[:-4] + '_modified.csv'
+    new_data.to_csv(os.path.join(get_data_dir(), filename), encoding='cp1252', index=None, sep=';')
diff --git a/source/main/manage_supplier_data.py b/source/main/manage_supplier_data.py
@@ -28,10 +28,10 @@ def get_args():
             address = row['Adresse 1'] if not pd.isnull(row['Adresse 1']) else row['RaisonSociale']
             if not pd.isnull(row['Adresse 2']):
                 address += ' ' + row['Adresse 2']
-            if pd.isnull(row['C.Postal']):
+            if pd.isnull(row['C.Postal']) or int(row['C.Postal']) == 0:
                 continue
             address += ' ' + str(int(row['C.Postal'])) + ' ' + row['Ville']
-            location = addr2c.get_coordinates(address, row['Pays'])
+            location = addr2c.get_coordinates(address, 'France' if pd.isnull(row['Pays']) else row['Pays'])
             data.loc[index, 'lat'] = location[0]
             data.loc[index, 'lon'] = location[1]
     finally: