-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdatedb.py
executable file
·465 lines (418 loc) · 17.6 KB
/
updatedb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
#!/usr/bin/python
# Author: Julian Brown
# Original Date: Sept 04, 2018
# License: MIT License, a copy of the license is in this repo
#
import sys
import getopt
import urllib
import sqlite3
import os
import json
import csv
#
# EliteTradeSearchPySqlite
#
# This is a set of scripts that contain information that helps playing the
# game Elite Dangerous.
#
# EliteTradeSearchPySqlite/updatedb.py
#
# This is one script of hopefully many that allow you to download the latest
# information from the Eddb.io site about the commodities, star systems, space
# stations, and commodity prices at those stations. This script will
# download those files and insert them into a SQLite database for easy
# perusal. The data gets old quickly, use this script to keep the database
# relevant for searching.
#
# Note: this should always be run in the EliteTradeSearchPySqlite directory as
# all of its's sister scripts and files will be located here. This file may
# contain upwards of 20 gigs during operations.
#
# The files and format that this script downloads and inserts into the
# SQLiteDB.
#
# https://eddb.io/archive/v5/commodities.json
#
# This file is in JSON format and here are just a few lines from a typical
# download.
# [
# {
# "ed_id" : 128672162,
# "is_rare" : 0,
# "min_sell_price" : 1298,
# "sell_price_upper_average" : 13368,
# "is_non_marketable" : 0,
# "max_sell_price" : 15769,
# "max_buy_price" : null,
# "min_buy_price" : null,
# "average_price" : 11912,
# "id" : 334,
# "category_id" : 16,
# "buy_price_lower_average" : 0,
# "category" : {
# "name" : "Salvage",
# "id" : 16
# },
# "name" : "Gene Bank"
# },
#
# From the commodities file we are mostly interested in just the name and
# id, the id is used as a foreign key in the db.
#
# https://eddb.io/archive/v5/systems_populated.jsonl
#
# This is a jsonl file, where each line is a complete json object, read
# each line in one at a time, parse and insert.
#
#{"id":1,"edsm_id":12695,"name":"1 G. Caeli","x":80.90625,"y":-83.53125,"z":-30.8125,"population":6544826,"is_populated":true,"government_id":144,"government":"Patronage","allegiance_id":2,"allegiance":"Empire","state_id":16,"state":"Boom","security_id":32,"security":"Medium","primary_economy_id":4,"primary_economy":"Industrial","power":"Arissa Lavigny-Duval","power_state":"Exploited","power_state_id":32,"needs_permit":false,"updated_at":1536002636,"simbad_ref":"","controlling_minor_faction_id":31816,"controlling_minor_faction":"1 G. Caeli Empire League","reserve_type_id":3,"reserve_type":"Common","minor_faction_presences":[{"minor_faction_id":31816,"state_id":16,"influence":69.1692,"state":"Boom"},{"minor_faction_id":54517,"state_id":80,"influence":3.5035,"state":"None"},{"minor_faction_id":54518,"state_id":80,"influence":1.1011,"state":"None"},{"minor_faction_id":54519,"state_id":80,"influence":6.4064,"state":"None"},{"minor_faction_id":74917,"state_id":80,"influence":5.1051,"state":"None"},{"minor_faction_id":40897,"state_id":80,"influence":11.1111,"state":"None"},{"minor_faction_id":4017,"state_id":80,"influence":3.6036,"state":"None"}]}
#
# https://eddb.io/archive/v5/stations.jsonl
#
# This is a jsonl file, where each line is a complete json object, read
# each line in one at a time, parse and insert.
#
#{"id":5,"name":"Reilly Hub","system_id":396,"updated_at":1535563693,"max_landing_pad_size":"L","distance_to_star":171,"government_id":64,"government":"Corporate","allegiance_id":3,"allegiance":"Federation","state_id":80,"state":"None","type_id":8,"type":"Orbis Starport","has_blackmarket":false,"has_market":true,"has_refuel":true,"has_repair":true,"has_rearm":true,"has_outfitting":true,"has_shipyard":true,"has_docking":true,"has_commodities":true,"import_commodities":["Pesticides","Aquaponic Systems","Biowaste"],"export_commodities":["Mineral Oil","Fruit and Vegetables","Grain"],"prohibited_commodities":["Narcotics","Tobacco","Combat Stabilisers","Imperial Slaves","Slaves","Personal Weapons","Battle Weapons","Bootleg Liquor","Landmines"],"economies":["Agriculture"],"shipyard_updated_at":1535909043,"outfitting_updated_at":1535909043,"market_updated_at":1535909042,"is_planetary":false,"selling_ships":["Adder","Eagle Mk. II","Federal Dropship","Hauler","Sidewinder Mk. I","Viper Mk III","Cobra MK IV"],"selling_modules":[738,739,740,743,744,745,748,749,750,753,754,755,758,759,760,793,794,795,796,797,824,826,828,837,838,840,843,851,876,877,878,879,880,882,883,884,886,888,891,892,893,896,897,898,927,928,929,932,933,936,937,938,941,942,946,948,961,962,963,964,966,967,968,998,999,1003,1004,1007,1008,1012,1013,1016,1017,1018,1021,1022,1023,1027,1032,1036,1037,1038,1039,1041,1042,1043,1046,1047,1048,1066,1071,1072,1116,1117,1118,1119,1121,1122,1123,1132,1136,1137,1138,1181,1182,1186,1191,1192,1193,1194,1195,1196,1197,1200,1201,1202,1203,1204,1205,1207,1208,1209,1213,1214,1242,1243,1244,1245,1246,1286,1306,1307,1310,1311,1317,1320,1324,1325,1326,1327,1373,1375,1377,1379,1381,1417,1518,1519,1520,1523,1524,1525,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535,1544,1545,1577,1579,1581,1583,1585,1587,1597,1609,1657],"settlement_size_id":null,"settlement_size":null,"settlement_security_id":null,"settlement_security":null,"body_id":7086578,"controlling_minor_faction_id":13925}
#
# https://eddb.io/archive/v5/listings.csv
#
# This is a CSV file of the price of commodities at all the stations.
#
#id,station_id,commodity_id,supply,supply_bracket,buy_price,sell_price,demand,demand_bracket,collected_at
#1,1,5,0,0,0,744,1185,3,1535999254
#
# put the data into this sqlite db (Note the schema is at the end of this file)
dbfile = 'tradesearch.db'
# information allowing easy use of urllib.urlretrieve
commodities_file = { 'url': "https://eddb.io/archive/v5/commodities.json", 'fname': 'commodities.json' }
systems_file = { 'url': "https://eddb.io/archive/v5/systems_populated.jsonl", 'fname': 'systems_populated.jsonl' }
stations_file = { 'url': "https://eddb.io/archive/v5/stations.jsonl", 'fname': 'stations.jsonl' }
prices_file = { 'url': "https://eddb.io/archive/v5/listings.csv", 'fname': 'prices.csv' }
def usage ():
print 'updatedb.py -h -c -m -s -t -p -a or --createdb --commodities --systems --stations --prices --all'
print ' -h - help'
print ' -c - create database (or re-create it)'
print ' -m - download and install commodities'
print ' -s - download and install systems'
print ' -t - download and install stations'
print ' -p - download and install prices'
print ' -a - do all of the above'
sys.exit (1)
# the status of our sqlite connection
sqlite_access = { 'connected': 0, 'conn': 0 }
# get the connection object from this routine, it uses the above
# status object to maintain the connection
def do_connect ():
if sqlite_access['connected'] == 1:
return sqlite_access['conn']
sqlite_access['conn'] = sqlite3.connect (dbfile)
sqlite_access['conn'].isolation_level = None
sqlite_access['connected'] = 1
return sqlite_access['conn']
# delete the database if it exists
# crate a new db
# inject the schema
def do_createdb ():
print "Creating db"
# if file exists, delete it we are creating it from scratch
if os.access (dbfile, os.F_OK):
os.remove (dbfile)
schema = getSchema ()
conn = do_connect ()
conn.executescript (schema)
conn.commit ()
print "Done"
# download the commodities file
# insert the commodities in the db
def do_commodities ():
print "Downloading and inserting Commodities"
urllib.urlretrieve (commodities_file['url'], commodities_file['fname'])
conn = do_connect ()
c = conn.cursor ()
json_fh=open(commodities_file['fname'])
json_data = json.load(json_fh)
json_fh.close ()
try:
c.execute ("BEGIN")
c.execute ('DELETE FROM commodities;')
for index, item in enumerate(json_data):
id = item["id"]
name = item["name"]
print name
c.execute ('INSERT INTO commodities ("id", "name") VALUES(?, ?);', (id, name))
c.execute ("COMMIT")
print "Done"
except sql.Error:
print "Failed"
c.execute ("ROLLBACK")
# download the star systems file
# insert the star systems into the db
def do_systems ():
print "Downloading and inserting Systems"
urllib.urlretrieve (systems_file['url'], systems_file['fname'])
conn = do_connect ()
c = conn.cursor ()
# one of the biggest problems with the star system database, is one of the
# main questions that will be asked.
#
# Get me all star systems within 150 light years from this star system.
# This is both a computationally and a humungous disk space problem.
#
# First there is the explosion of distance calculations, basically
# evertime you ask that question, there are approx 21,000 star systems
# so I would have to execute the distance calculation 21,000 times.
# As big a problem that is, it is not the biggest problem.
#
# So lets create an db that lists the distances between each star system.
# That calculation leads to 21,000 squared number of rows which is in
# excess of 441 million rows. If there was a table with 3 simple values
# id1, id2, distance. Where id1 is the id of star system 1, id2 is the
# 2nd star system, and distance is the distance between them. So
# considering that and having at least one index means that this table
# alone could be over 31 gb in size, both too expensive to create and
# to expensive to review. So we have to do it another way.
#
# The new way, is there will be a C program, which is the fastest way
# to process this calculation, and output a binary 2d array of the
# 441 million unsigned shorts which will represent the distances.
# Using an unsigned short means the distance will be between 0 and
# 65535 light years. Well rarely will anyone select over 500 ly so
# that is sufficient and also means that 1 byte would be insufficient
# (0-255).
#
# system_master_list.csv is the list of each index, id, x, y, and z of
# each system which the C program will use to prepare that array.
#
# c program is calc_distance, and creates a file called
# distance_matrix.bin that is just under 1 gb
#
fo = open ("system_master_list.csv", 'w')
idx = 0
try:
# Note, using begin and commit, allows the insertions go much
# faster
c.execute ("BEGIN")
c.execute ('DELETE FROM Systems;')
f = open (systems_file["fname"], 'r')
for line_wlf in f:
line = line_wlf.rstrip('\n')
json_data = json.loads(line)
needs_permit = 0
if json_data["needs_permit"] == "true":
needs_permit = 1
print "name :" + json_data["name"]
mystr = str(idx) + "," + str(json_data["id"]) + "," + str(json_data["x"]) + "," + str(json_data["y"]) + "," + str(json_data["z"]) + "\n"
fo.write (mystr)
idx = idx + 1
c.execute (
'INSERT INTO Systems ("idx", "id", "edsm_id", "name", "x", "y", "z", "needs_permit") VALUES(?, ?, ?, ?, ?, ?, ?, ?);',
(idx, json_data["id"], json_data["edsm_id"], json_data["name"], json_data["x"], json_data["y"], json_data["z"], needs_permit))
c.execute ("COMMIT")
print "Done"
except Exception as e:
print "Failed"
print e
c.execute ("ROLLBACK")
sys.exit (1)
fo.close ()
# max_count.txt is the maximum size of the array of systems
fo = open ("max_count.txt", "w")
fo.write (str(idx))
fo.close ()
# now run the external c program
print "Calling calc_distances"
os.system ("./calc_distances");
print "returned from calc_distances"
# download the stations (where ships can land) file
# insert the stations into the db
def do_stations ():
print "Downloading and inserting Stations"
urllib.urlretrieve (stations_file['url'], stations_file['fname'])
conn = do_connect ()
c = conn.cursor ()
try:
c.execute ("BEGIN")
c.execute ('DELETE FROM stations;')
f = open (stations_file["fname"], 'r')
for line_wlf in f:
line = line_wlf.rstrip('\n')
json_data = json.loads(line)
is_planetary = 0
if json_data["is_planetary"] == "true":
is_planetary = 1
if json_data["distance_to_star"] == None:
json_data["distance_to_star"] = 100000
if json_data["max_landing_pad_size"] == None:
json_data["max_landing_pad_size"] = "S"
print "name :" + json_data["name"]
c.execute (
'INSERT INTO stations ("id", "name", "system_id", "updated_at", "max_landing_pad_size", "distance_to_star", "is_planetary") VALUES(?, ?, ?, ?, ?, ?, ?);',
(json_data["id"], json_data["name"], json_data["system_id"], json_data["updated_at"], json_data["max_landing_pad_size"], json_data["distance_to_star"], is_planetary))
c.execute ("COMMIT")
print "Done"
except Exception as e:
print "Failed"
print e
c.execute ("ROLLBACK")
# download the prices csv file (note this is a very large file)
# insert them into the db
def do_prices ():
print "Downloading and inserting Prices"
urllib.urlretrieve (prices_file['url'], prices_file['fname'])
conn = do_connect ()
c = conn.cursor ()
try:
# BEGIN and COMMIT are critical, as this would take weeks
# to insert other wise (up to 4 million rows)
c.execute ("BEGIN")
c.execute ('DELETE FROM prices;')
f = open (prices_file["fname"], 'r')
csvreader = csv.reader(f, delimiter=',', quotechar='|')
flag = 1
count = 0
for row in csvreader:
if flag:
flag = 0
else:
c.execute (
'INSERT INTO prices ("id", "station_id", "commodity_id", "supply", "buy_price", "sell_price", "demand", "collected_at") VALUES(?, ?, ?, ?, ?, ?, ?, ?);',
(row[0], row[1], row[2], row[3], row[5], row[6], row[7], row[9]))
count = count + 1
if count % 10000 == 0:
print "Count " + str (count)
c.execute ("COMMIT")
print "Done"
except Exception as e:
print "Failed"
print e
c.execute ("ROLLBACK")
# this is the main routine, it parses the command line
# parameters and initiates the actions
def process_args (argv):
# these are the actions to take
createdb = 0
commodities = 0
systems = 0
stations = 0
prices = 0
# parse the arguments here are the options
try:
opts, args = getopt.getopt (argv, "hcmstpa", ["createdb", "commodities", "systems", "stations", "prices", "all"])
except getopt.GetoptError:
usage ()
for opt, arg in opts:
if opt == '-h':
usage ()
elif opt in ("-c", "--createdb", "-a", "--all"):
# note -c and -a are synonyms
createdb = 1
commodities = 1
systems = 1
stations = 1
prices = 1
elif opt in ("-m", "--commodities"):
commodities = 1
elif opt in ("-s", "--systems"):
systems = 1
elif opt in ("-t", "--stations"):
stations = 1
elif opt in ("-p", "--prices"):
prices = 1
# now do the actions
if createdb:
do_createdb ()
if commodities:
do_commodities ()
if systems:
do_systems ()
if stations:
do_stations ()
if prices:
do_prices ()
if not createdb and not commodities and not systems and not stations and not prices:
print 'Nothing to do'
usage ()
# I keep the schema as the result of a routine
# so it is easy to insert into the db
def getSchema ():
schema = """CREATE TABLE `stations` (
`id` INTEGER NOT NULL,
`name` TEXT NOT NULL,
`system_id` INTEGER NOT NULL,
`updated_at` INTEGER NOT NULL,
`max_landing_pad_size` TEXT NOT NULL,
`distance_to_star` REAL NOT NULL,
`is_planetary` INTEGER NOT NULL,
PRIMARY KEY(`id`)
);
CREATE INDEX `stations_id` ON `stations` (
`id`
);
CREATE INDEX `stations_system_id` ON `stations` (
`system_id`
);
CREATE TABLE `commodities` (
`id` INTEGER NOT NULL,
`name` TEXT NOT NULL
);
CREATE INDEX `commodities_id` ON `commodities` (
`id`
);
CREATE INDEX `commodities_name` ON `commodities` (
`name`
);
CREATE TABLE `prices` (
`id` INTEGER NOT NULL,
`station_id` INTEGER NOT NULL,
`commodity_id` INTEGER NOT NULL,
`supply` INTEGER NOT NULL,
`demand` INTEGER NOT NULL,
`buy_price` INTEGER NOT NULL,
`sell_price` INTEGER NOT NULL,
`collected_at` INTEGER NOT NULL,
PRIMARY KEY(`id`)
);
CREATE INDEX `prices_station_id` ON `prices` (
`station_id`
);
CREATE INDEX `prices_collected_at` ON `prices` (
`collected_at`
);
CREATE INDEX `prices_commodity_id` ON `prices` (
`commodity_id`
);
CREATE INDEX `prices_id` ON `prices` (
`id`,
`commodity_id`
);
CREATE TABLE IF NOT EXISTS "Systems" (
`id` INTEGER NOT NULL,
`edsm_id` INTEGER NOT NULL,
`idx` INTEGER NOT NULL DEFAULT 0,
`name` TEXT NOT NULL,
`x` REAL NOT NULL,
`y` REAL NOT NULL,
`z` REAL NOT NULL,
`needs_permit` INTEGER NOT NULL,
PRIMARY KEY(`id`)
);
CREATE INDEX `systems_name` ON `Systems` (
`name`
);
CREATE INDEX `systems_edsm_id` ON `Systems` (
`edsm_id`
);
CREATE INDEX `stations_name` ON `Systems` (
`name`
);
CREATE INDEX `systems_index_id` ON `Systems` (
`idx`,
`id`
);
CREATE INDEX `systems_id` ON `Systems` (
`id`,
`idx`
);"""
return schema
# now call the main routine
process_args(sys.argv[1:])