-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathDataServicePillager.py
787 lines (670 loc) · 36.3 KB
/
DataServicePillager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
# -*- coding: utf-8 -*-
# -------------------------------------------------------------------------------
# Purpose: Extract data from an ArcGIS Service, in chunks defined by
# the service Max Record Count to get around that limitation.
# Requires that JSON is supported by the service
# Author: Grant Herbert
#
# Created: 12/11/2014
# Copyright: (c) Grant Herbert 2014
# Licence: MIT License
# -------------------------------------------------------------------------------
"""
This software is designed for use with ArcGIS as a toolbox tool.
This software is distributed with an MIT License.
THIS SOFTWARE IS SUPPLIED AS-IS, WITH NO WARRANTY OR GUARANTEE, EXPLICT OR IMPLICIT. THE AUTHORS
OF THIS SOFTWARE ASSUME NO LIABILITY FROM IMPROPER USE OR OPERATION.
"""
try:
import sys
import arcpy
import urllib
import urllib.request
import urllib.parse
import json
import os
import codecs
import datetime
import time
import itertools
import re
import ssl
except ImportError as e:
print(e)
sys.exit()
# --------
# globals
arcpy.env.overwriteOutput = True
count_tries = 1
max_tries = 5
sleep_time = 2
ssl_context = ssl._create_unverified_context() # hacky workaround for SSL Cert issue - don't bother verifying SSL certs
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
# --------
def trace():
import sys
import traceback
tb = sys.exc_info()[2]
tbinfo = traceback.format_tb(tb)[0] # script name + line number
line = tbinfo.split(", ")[1]
# Get Python syntax error
synerror = traceback.format_exc().splitlines()[-1]
return line, synerror
def output_msg(msg, severity=0):
""" Adds a Message (in case this is run as a tool)
and also prints the message to the screen (standard output)
:param msg: text to output
:param severity: 0 = none, 1 = warning, 2 = error
"""
print(msg)
# Split the message on \n first, so that if it's multiple lines,
# a GPMessage will be added for each line
try:
for string in msg.split('\n'):
# Add appropriate geoprocessing message
if severity == 0:
arcpy.AddMessage(string)
elif severity == 1:
arcpy.AddWarning(string)
elif severity == 2:
arcpy.AddError(string)
except:
pass
def test_url(url_to_test):
"""test a url for validity (non-404)
:param token_url: String
"""
try:
if urllib.request.urlopen(url_to_test):
output_msg("Ho, a successful url test: {}".format(url_to_test))
return url_to_test
except urllib.request.HTTPError as e:
if e.code == 404:
output_msg("Arr, 404 error: {}".format(url_to_test))
return None
except urllib.request.URLError as e:
return None
def get_adapter_name(url_string):
"""extract web adaptor name from endpoint
:param url_string: url of service
"""
u = urllib.parse.urlparse(url_string)
if u.netloc.find('arcgis.com') > -1:
# is an esri domain
refer = r"https://www.arcgis.com"
adapter_name = u.path.split("/")[2] # third element
else:
adapter_name = u.path.split("/")[1] # second element
return adapter_name
def get_referring_domain(url_string):
"""get referring domain part of url
:param url_string url of service
"""
u = urllib.parse.urlparse(url_string)
if u.netloc.find('arcgis.com') > -1:
# is an esri domain
ref_domain = r"https://www.arcgis.com"
else:
# generate from service url and hope it works
if u.scheme == 'http':
ref_domain = urllib.parse.urlunsplit(['https', u.netloc, '', '', ''])
else:
ref_domain = urllib.parse.urlunsplit([u.scheme, u.netloc, '', '', ''])
return ref_domain
def get_token(username, password, referer, adapter_name, client_type='requestip', expiration=240):
""" Get Esri access token. Uses requestip by default
:param username: valid username
:param password: valid password
:param referer: referer url
:param adapter_name: name of the arcgis server adapter
:param client_type: whether to use referer value over requestip (default False uses requestip)
:param expiration: optional validity time in minutes (default 240)
"""
query_dict = {'username': username,
'password': password,
'expiration': str(expiration),
'client': client_type,
'referer': referer,
'f': 'json'}
# check for ArcGIS token generator url
token_url = None
token_url_array = [referer + r"/sharing/rest/generateToken",
referer + r"/" + adapter_name + r"/tokens/generateToken"]
for url2test in token_url_array:
if test_url(url2test):
token_url = url2test
break
if token_url:
token_response = urllib.request.urlopen(token_url, urllib.parse.urlencode(query_dict).encode('utf-8'))
token_json = json.loads(token_response.read(), strict=False)
else:
token_json = {"error": "unable to get token"}
if "token" in token_json:
token = token_json['token']
return token
else:
output_msg(
"Avast! The scurvy gatekeeper says 'Could not generate a token with the username and password provided'. Check yer login details are shipshape!",
severity=2)
if "error" in token_json:
output_msg(token_json["error"], severity=2)
elif "message" in token_json:
output_msg(token_json['message'], severity=2)
raise ValueError("Token Error")
def get_all_the_layers(service_endpoint, tokenstring):
"""walk the endpoint and extract feature layer or map layer urls
:param service_endpoint starting url
:param tokenstring string containing token for authentication
"""
service_call = urllib.request.urlopen(service_endpoint + '?f=json' + tokenstring).read()
if service_call:
service_layer_info = json.loads(service_call, strict=False)
if service_layer_info.get('error'):
raise Exception("Gaaar, 'service_call' failed to access {0}".format(service_endpoint))
else:
raise Exception("Gaaar, 'service_call' failed to access {0}".format(service_endpoint))
service_version = service_layer_info.get('currentVersion')
service_layers_to_walk = []
service_layers_to_get = []
# search any folders
if 'folders' in service_layer_info.keys() and len(service_layer_info.get('folders')) > 0:
catalog_folder = service_layer_info.get('folders')
folder_list = [f for f in catalog_folder if f.lower() not in 'utilities']
for folder_name in folder_list:
output_msg("Ahoy, I be searching {} for hidden treasure...".format(folder_name), severity=0)
lyr_list = get_all_the_layers(service_endpoint + '/' + folder_name, tokenstring)
if lyr_list:
service_layers_to_walk.extend(lyr_list)
# get list of service urls
if 'services' in service_layer_info.keys() and len(service_layer_info.get('services')) > 0:
catalog_services = service_layer_info.get('services')
for service in catalog_services:
servicetype = service['type']
servicename = service['name']
if servicetype in ['MapServer', 'FeatureServer']:
service_url = service_endpoint + '/' + servicename + '/' + servicetype
if servicename.find('/') > -1:
folder, sname = servicename.split('/')
if service_endpoint.endswith(folder):
service_url = service_endpoint + '/' + sname + '/' + servicetype
service_layers_to_walk.append(service_url)
if len(service_layers_to_walk) == 0:
# no services or folders
service_layers_to_walk.append(service_endpoint)
for url in service_layers_to_walk:
# go get the json and information and walk down until you get all the service urls
service_call = json.load(urllib.request.urlopen(url + '?f=json' + tokenstring))
# for getting all the layers, start with a list of sublayers
service_layers = None
service_layer_type = None
if service_call.get('layers'):
service_layers = service_call.get('layers')
service_layer_type = 'layers'
elif service_call.get('subLayers'):
service_layers = service_layer_info.get('subLayers')
service_layer_type = 'sublayers'
# subLayers an array of objects, each has an id
if service_layers is not None:
# has sub layers, get em all
for lyr in service_layers:
if not lyr.get('subLayerIds'): # ignore group layers
lyr_id = str(lyr.get('id'))
if service_layer_type == 'layers':
sub_layer_url = url + '/' + lyr_id
lyr_list = get_all_the_layers(sub_layer_url, tokenstring)
if lyr_list:
service_layers_to_walk.extend(lyr_list)
# add the full url
else:
service_layers_to_get.append(sub_layer_url)
elif service_layer_type == 'sublayers':
# handled differently, drop the parent layer id and use sublayer id
sub_endpoint = url.rsplit('/', 1)[0]
sub_layer_url = sub_endpoint + '/' + lyr_id
lyr_list = get_all_the_layers(sub_layer_url, tokenstring)
if lyr_list:
service_layers_to_walk.extend(lyr_list)
else:
service_layers_to_get.append(sub_layer_url)
else:
# no sub layers
# check if group layer
if service_call.get('type'):
if not service_call.get('type') in ("Group Layer", "Raster Layer"):
service_layers_to_get.append(url)
return service_layers_to_get
def get_data(query):
""" :param query: url query string
Download the data.
Return a JSON object
Automatically retries up to max_tries times.
"""
global count_tries
global max_tries
global sleep_time
try:
response = urllib.request.urlopen(query).read() #get a byte str by default
if response:
try:
response = response.decode('utf-8') # convert to unicode
except UnicodeDecodeError:
response = response.decode('unicode-escape') # convert to unicode
# load to json and check for error
resp_json = json.loads(response)
if resp_json.get('error'):
output_msg(resp_json['error'])
return resp_json
else:
return {'error': 'no response received'}
except Exception as e:
output_msg(str(e), severity=1)
# sleep and try again
if hasattr(e, 'errno') and e.errno == 10054:
#connection forcible closed, extra sleep pause
time.sleep(sleep_time)
time.sleep(sleep_time)
count_tries += 1
if count_tries > max_tries:
count_tries = 0
output_msg("Avast! Error: ACCESS_FAILED")
return None
else:
output_msg("Hold fast, attempt {0} of {1}".format(count_tries, max_tries))
return get_data(query=query)
def combine_data(fc_list, output_fc):
""" :param fc_list: array of featureclass paths as strings
:param output_fc: path to output dataset
Combine the downloaded datafiles into one
fastest approach is to use cursor
Will drop spatial index on the destination for larger inputs to try and speed up insert
"""
count_fc = len(fc_list)
drop_spatial = False # whether to drop the spatial index before loading
is_spatial = arcpy.Describe(fc_list[0]).dataType
if count_fc > 50 and is_spatial == 'FeatureClass': # larger inputs
drop_spatial = True
if count_fc == 1:
#simple case
arcpy.Copy_management(fc_list[0], output_fc)
output_msg("Created {0}".format(output_fc))
else:
for fc in fc_list:
if fc_list.index(fc) == 0:
# append to first dataset. much faster
output_msg("Prepping yer first dataset {0}".format(fc))
if arcpy.Exists(output_fc):
output_msg("Avast! {0} exists, deleting...".format(output_fc), severity=1)
arcpy.Delete_management(output_fc)
arcpy.Copy_management(fc, output_fc) # create dataset to append to
output_msg("Created {0}".format(output_fc))
if drop_spatial:
# delete the spatial index for better loading
output_msg("Dropping spatial index for loading performance")
arcpy.management.RemoveSpatialIndex(output_fc)
fieldlist = []
#fieldlist = ["SHAPE@"]
fields = arcpy.ListFields(output_fc)
for field in fields:
if field.name.lower() == u'shape':
fieldlist.insert(0, "SHAPE@") # add shape token to start
else:
fieldlist.append(field.name)
insert_rows = arcpy.da.InsertCursor(output_fc, fieldlist)
else:
search_rows = arcpy.da.SearchCursor(fc, fieldlist) # append to first dataset
for row in search_rows:
insert_rows.insertRow(row)
del row, search_rows
output_msg("Appended {0}...".format(fc))
if drop_spatial:
# recreate the spatial index
output_msg("Adding spatial index")
arcpy.management.AddSpatialIndex(output_fc)
del insert_rows
def grouper(iterable, n, fillvalue=None):
""" Cut iterable into n sized groups
from itertools documentation, may not be most efficient, fillvalue causes issue
:param iterable: object to iterate over
:param n: int value to group
:param fillvalue: value to fill with if chunk smaller than n
"""
args = [iter(iterable)] * n
return itertools.zip_longest(*args, fillvalue=fillvalue)
def create_layer_file(service_info, service_name, layer_source, output_folder):
"""
write out a layer file from service renderer information, providing
:param service_info: json (to extract the drawingInfo from)
:param service_name: String
:param layer_source: String path to file
:param output_folder: String path
"""
#TODO duplicate this functionality for Pro
try:
render_info = {"drawingInfo": {"renderer": {}}}
if 'drawingInfo' in service_info:
render_info["drawingInfo"]['renderer'] = service_info.get('drawingInfo').get('renderer')
render_file = os.path.join(output_folder, service_name + "_renderer.txt")
with open(render_file, 'w') as r_file:
json.dump(render_info, r_file)
output_msg("Yar! {0} Service renderer stashed in '{1}'".format(service_name, render_file))
layer_file_name = os.path.join(output_folder, service_name + ".lyrx")
output_msg("Sketchin' yer layer, {}".format(layer_file_name))
layer_temp = arcpy.MakeFeatureLayer_management(layer_source, service_name)
arcpy.SaveToLayerFile_management(in_layer=layer_temp, out_layer=layer_file_name, is_relative_path="RELATIVE")
lyr_file = arcpy.mp.LayerFile(layer_file_name)
lyr_update = lyr_file.listLayers()[0] # is a Layer type
lyr_cim = lyr_update.getDefinition('V2') #get CIM definition
symbCIM1 = lyr_cim.renderer.symbol.symbol.symbolLayers #may be 2 - outline and fill depends on type
num_symbol = len(symbCIM1)
## super complicated - may have color, may not, its a right mess
if num_symbol == 2: #polygon?
symb_out = symbCIM1[0]
symb_fill = symbCIM1[1]
symb_fill.color.values = render_info["renderer"]["symbol"]["color"]
if "outline" in render_info["renderer"]["symbol"]:
symb_out.color.values = render_info["renderer"]["symbol"]["outline"]["color"]
symb_out.width = render_info["renderer"]["symbol"]["outline"]["width"] #only if polygon
else:
symbCIM1[0].size = render_info["renderer"]["symbol"]["size"]
lyr_cim.setDefinition(symbCIM1)
lyr_file.save()
output_msg("Stashed yer layer, {}".format(layer_file_name))
else:
output_msg("Gaar, no renderer t' sketch from, so no layer file fer ya")
except Exception as e:
output_msg(str(e), severity=1)
output_msg("Failed yer layer file drawin'")
def make_service_name(service_info, output_workspace, output_folder_path_len):
global service_output_name_tracking_list
global output_type
# establish a unique name that isn't too long
# 160 character limit for filegeodatabase
max_path_length = 230 # sanity length for windows systems
if output_type == 'Workspace':
max_name_len = 150 # based on fgdb
else:
max_name_len = max_path_length - output_folder_path_len
parent_name = ''
parent_id = ''
service_name = service_info.get('name')
service_id = str(service_info.get('id'))
# clean up the service name (remove invalid characters)
service_name_cl = service_name.encode('ascii', 'ignore') # strip any non-ascii characters that may cause an issue
# remove multiple underscores and any other problematic characters
service_name_cl = re.sub(r'[_]+', '_', arcpy.ValidateTableName(service_name_cl, output_workspace))
service_name_cl = service_name_cl.rstrip('_')
if len(service_name_cl) > max_name_len:
service_name_cl = service_name_cl[:max_name_len]
service_name_len = len(service_name_cl)
if service_info.get('parentLayer'):
parent_name = service_info.get('parentLayer').get('name')
parent_id = str(service_info.get('parentLayer').get('id'))
if output_folder_path_len + service_name_len > max_path_length: # can be written to disc
# shorten the service name
max_len = max_path_length - output_folder_path_len
if max_len < service_name_len:
service_name_cl = service_name_cl[:max_len]
# check if name already exists
if service_name_cl not in service_output_name_tracking_list:
service_output_name_tracking_list.append(service_name_cl)
else:
if service_name_cl + "_" + service_id not in service_output_name_tracking_list:
service_name_cl += "_" + service_id
service_output_name_tracking_list.append(service_name_cl)
else:
service_name_cl += parent_id + "_" + service_id
return service_name_cl
#-------------------------------------------------
def main():
global count_tries
global max_tries
global sleep_time
global service_output_name_tracking_list
global output_type
start_time = datetime.datetime.today()
try:
# arcgis toolbox parameters
service_endpoint = arcpy.GetParameterAsText(0) # String - URL of Service endpoint required
output_workspace = arcpy.GetParameterAsText(1) # String - gdb/folder to put the results required
max_tries = arcpy.GetParameter(2) # Int - max number of retries allowed required
sleep_time = arcpy.GetParameter(3) # Int - max number of retries allowed required`
strict_mode = arcpy.GetParameter(4) # Bool - JSON check True/False required
username = arcpy.GetParameterAsText(5) # String - username optional
password = arcpy.GetParameterAsText(6) # String - password optional
referring_domain = arcpy.GetParameterAsText(7) # String - url of auth domain
existing_token = arcpy.GetParameterAsText(8) # String - valid token value
query_str = arcpy.GetParameterAsText(9) # String - valid SQL query string
sanity_max_record_count = 10000
# to query by geometry need [xmin,ymin,xmax,ymax], spatial reference, and geometryType (eg esriGeometryEnvelope
service_output_name_tracking_list = []
if service_endpoint == '':
output_msg("Avast! Can't plunder nothing from an empty url! Time to quit.")
sys.exit()
if not type(strict_mode) is bool:
strict_mode = True
if not type(max_tries) is int:
max_tries = int(max_tries)
if not type(sleep_time) is int:
sleep_time = int(sleep_time)
if query_str:
query_str = urllib.parse.quote(query_str)
if output_workspace == '':
output_workspace = os.getcwd()
output_desc = arcpy.Describe(output_workspace)
output_type = output_desc.dataType
if output_type == "Folder": # To Folder
output_folder = output_workspace
else:
output_folder = output_desc.path
adapter_name = get_adapter_name(service_endpoint)
token_client_type = 'requestip'
if referring_domain != '':
referring_domain = referring_domain.replace('http:', 'https:')
token_client_type = 'referer'
else:
referring_domain = get_referring_domain(service_endpoint)
if referring_domain == r"https://www.arcgis.com":
token_client_type = 'referer'
# build a generic opener with the use agent spoofed
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)
token = ''
if username and not existing_token:
token = get_token(username=username, password=password, referer=referring_domain, adapter_name=adapter_name,
client_type=token_client_type)
elif existing_token:
token = existing_token
tokenstring = ''
if len(token) > 0:
tokenstring = '&token=' + token
output_msg("Start the plunder! {0}".format(service_endpoint))
output_msg("We be stashing the booty in {0}".format(output_workspace))
service_layers_to_get = get_all_the_layers(service_endpoint, tokenstring)
output_msg("Blimey, {} layers for the pillagin'".format(len(service_layers_to_get)))
for slyr in service_layers_to_get:
count_tries = 0
downloaded_fc_list = [] # for file merging.
response = None
current_iter = 0
max_record_count = 0
feature_count = 0
final_fc = ''
output_msg("Now pillagin' yer data from {0}".format(slyr))
service_info_call = urllib.request.urlopen(slyr + '?f=json' + tokenstring).read()
if service_info_call:
service_info = json.loads(service_info_call, strict=False)
else:
raise Exception("'service_info_call' failed to access {0}".format(slyr))
if not service_info.get('error'):
# add url to info
service_info[u'serviceURL'] = slyr
# assume JSON supported
supports_json = True
if strict_mode:
# check JSON supported
supports_json = False
if 'supportedQueryFormats' in service_info:
supported_formats = service_info.get('supportedQueryFormats').split(",")
for data_format in supported_formats:
if data_format == "JSON":
supports_json = True
break
else:
output_msg('Strict mode scuttled, no supported formats')
objectid_field = "OBJECTID"
if 'fields' in service_info:
field_list = service_info.get('fields')
if field_list:
for field in field_list:
ftype = field.get('type')
if ftype == 'esriFieldTypeOID':
objectid_field = field.get('name')
else:
output_msg("No field list - come about using {0}!".format(objectid_field))
# get count
if query_str == '':
feature_count_call = urllib.request.urlopen(slyr + '/query?where=1%3D1&returnCountOnly=true&f=pjson' + tokenstring).read()
else:
feature_count_call = urllib.request.urlopen(slyr + '/query?where=' + query_str + '&returnCountOnly=true&f=pjson' + tokenstring).read()
if feature_count_call:
feature_count = json.loads(feature_count_call)
service_info[u'FeatureCount'] = feature_count.get('count')
service_name_cl = make_service_name(service_info, output_workspace, len(output_folder))
info_filename = service_name_cl + "_info.txt"
info_file = os.path.join(output_folder, info_filename)
# write out the service info for reference
with open(info_file, 'w') as i_file:
json.dump(service_info, i_file, sort_keys=True, indent=4, separators=(',', ': '))
output_msg("Yar! {0} Service info stashed in '{1}'".format(service_name_cl, info_file))
if supports_json:
try:
# to query using geometry,&geometry= &geometryType= esriGeometryEnvelope &inSR= and probably spatial relationship and buffering
feat_data_query = r"/query?outFields=*&returnGeometry=true&returnIdsOnly=false&returnCountOnly=false&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Meter&maxAllowableOffset=&geometryPrecision=&outSR=&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&f=json" + tokenstring
if query_str =='':
feat_OIDLIST_query = r"/query?where=" + objectid_field + r"+%3E+0&returnGeometry=false&returnIdsOnly=true&returnCountOnly=false&returnExtentOnly=false&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Meter&outFields=&maxAllowableOffset=&geometryPrecision=&outSR=&orderByFields=&groupByFieldsForStatistics=&outStatistics=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&f=json" + tokenstring
else:
feat_OIDLIST_query = r"/query?where=" + query_str + r"&returnGeometry=false&returnIdsOnly=true&returnCountOnly=false&returnExtentOnly=false&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Meter&outFields=&maxAllowableOffset=&geometryPrecision=&outSR=&orderByFields=&groupByFieldsForStatistics=&outStatistics=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&f=json" + tokenstring
max_record_count = service_info.get('maxRecordCount') # maximum number of records returned by service at once
if max_record_count > sanity_max_record_count:
output_msg(
"{0} max records is a wee bit large, using {1} instead...".format(max_record_count,
sanity_max_record_count))
max_record_count = sanity_max_record_count
# extract using actual OID values is the safest way
feature_OIDs = None
feature_OID_query = json.loads(urllib.request.urlopen(slyr + feat_OIDLIST_query).read())
if feature_OID_query and 'objectIds' in feature_OID_query:
feature_OIDs = feature_OID_query["objectIds"]
else:
output_msg("Blast, no OID values: {}".format(feature_OID_query))
if feature_OIDs:
OID_count = len(feature_OIDs)
sortie_count = OID_count//max_record_count + (OID_count % max_record_count > 0)
output_msg("{0} records, in chunks of {1}, err, that be {2} sorties. Ready lads!".format(OID_count, max_record_count, sortie_count))
feature_OIDs.sort()
# chunk them
for group in grouper(feature_OIDs, max_record_count):
# reset count_tries
count_tries = 0
start_oid = group[0]
end_oid = group[max_record_count-1]
if end_oid is None: # reached the end of the iterables
# loop through and find last oid, need this due to fillvalue of None in grouper
for i in reversed(group):
if i is not None:
end_oid = i
break
# >= %3E%3D, <= %3C%3D
if query_str == '':
where_clause = "&where={0}+%3E%3D+{1}+AND+{2}+%3C%3D+{3}".format(objectid_field,
str(start_oid),
objectid_field,
str(end_oid))
else:
where_clause = "&where={0}+AND+{1}+%3E%3D+{2}+AND+{3}+%3C%3D+{4}".format(query_str,
objectid_field,
str(start_oid),
objectid_field,
str(end_oid))
# response is a string of json with the attributes and geometry
query = slyr + feat_data_query + where_clause
response = get_data(query) # expects json object
if not response.get('features'):
raise ValueError("Abandon ship! Data access failed! Check what ye manag'd to plunder before failure.")
else:
feature_dict = response["features"] # load the features so we can check they are not empty
if len(feature_dict) != 0:
# convert response to json file on disk then to gdb/shapefile (is fast)
# can hit long filename issue!!!!
# look at an arcpy.FeatureSet() to hold the data
# some services produce JSON that errors a FeatureSet()
##fs = arcpy.FeatureSet()
##fs.load(response)
out_JSON_name = service_name_cl + str(current_iter) + ".json"
out_JSON_file = os.path.join(output_folder, out_JSON_name)
with codecs.open(out_JSON_file, 'w', 'utf-8') as out_file:
data = json.dumps(response, ensure_ascii=False)
out_file.write(data)
output_msg("Nabbed some json data fer ye: '{0}', oids {1} to {2}".format(out_JSON_name, start_oid, end_oid))
if output_type == "Folder":
out_file_name = service_name_cl + str(current_iter) + ".shp"
else:
out_file_name = service_name_cl + str(current_iter)
out_geofile = os.path.join(output_workspace, out_file_name)
output_msg("Converting yer json to {0}".format(out_geofile))
# may not be needed if using a featureSet()
arcpy.JSONToFeatures_conversion(out_JSON_file, out_geofile)
##arcpy.JSONToFeatures_conversion(fs, out_geofile)
downloaded_fc_list.append(out_geofile)
os.remove(out_JSON_file) # clean up the JSON file
current_iter += 1
else:
raise ValueError("Aaar, plunderin' failed, feature OIDs is None")
# download complete, create a final output
if output_type == "Folder":
final_fc = os.path.join(output_workspace, service_name_cl + ".shp")
else:
final_fc = os.path.join(output_workspace, service_name_cl)
output_msg("Stashin' all the booty in '{0}'".format(final_fc))
#combine all the data
combine_data(fc_list=downloaded_fc_list, output_fc=final_fc)
#create_layer_file(service_info=service_info, service_name=service_name_cl, layer_source=final_fc, output_folder=output_folder)
elapsed_time = datetime.datetime.today() - start_time
output_msg("{0} plundered in {1}".format(final_fc, str(elapsed_time)))
except ValueError as e:
output_msg(str(e), severity=2)
except Exception as e:
line, err = trace()
output_msg("Script Error\n{0}\n on {1}".format(err, line), severity=2)
output_msg(arcpy.GetMessages())
finally:
if arcpy.Exists(final_fc):
data_count = int(arcpy.GetCount_management(final_fc)[0])
if data_count == OID_count: #we got it all
output_msg("Scrubbing the decks...")
for fc in downloaded_fc_list:
arcpy.Delete_management(fc)
else:
output_msg("Splicin' the data failed - found {0} but expected {1}. Check {2} to see what went wrong.".format(data_count, OID_count, final_fc))
else:
# no JSON output
output_msg("Aaaar, ye service does not support JSON output. Can't do it.")
else:
# service info error
output_msg("Error: {0}".format(service_info.get('error')), severity=2)
except ValueError as e:
output_msg("ERROR: " + str(e), severity=2)
except Exception as e:
if hasattr(e, 'errno') and e.errno == 10054:
output_msg("ERROR: " + str(e), severity=2)
else:
line, err = trace()
output_msg("Error\n{0}\n on {1}".format(err, line), severity=2)
output_msg(arcpy.GetMessages())
finally:
elapsed_time = datetime.datetime.today() - start_time
output_msg("Plunderin' done, in " + str(elapsed_time))
if __name__ == '__main__':
main()