From b191962a4ea6471963426e3b9707d5af88fc2fa2 Mon Sep 17 00:00:00 2001 From: HarrisChu <1726587+HarrisChu@users.noreply.github.com> Date: Wed, 13 Apr 2022 15:20:31 +0800 Subject: [PATCH 1/3] add prefix for LDBC --- nebula_bench/cli.py | 55 +++++++++++++++++----- nebula_bench/controller.py | 27 +++++++++-- nebula_bench/parser.py | 20 ++++++++ scripts/copy-data.py | 15 +++--- scripts/generate-data.sh | 2 +- templates/nebula-import-vid-int.yaml.j2 | 3 ++ templates/nebula-import-vid-string.yaml.j2 | 9 ++++ 7 files changed, 107 insertions(+), 24 deletions(-) diff --git a/nebula_bench/cli.py b/nebula_bench/cli.py index 5ee29ce..ac4f642 100644 --- a/nebula_bench/cli.py +++ b/nebula_bench/cli.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from email.policy import default import click from nebula_bench import setting @@ -12,11 +13,17 @@ def common(f): - f = click.option("-f", "--folder", help="ldbc data folder, default: target/data/test_data")(f) + f = click.option( + "-f", "--folder", help="ldbc data folder, default: target/data/test_data" + )(f) - f = click.option("-a", "--address", help="Nebula Graph address, default: 127.0.0.1:9669")(f) + f = click.option( + "-a", "--address", help="Nebula Graph address, default: 127.0.0.1:9669" + )(f) f = click.option("-u", "--user", help="Nebula Graph address, default: root")(f) - f = click.option("-p", "--password", help="Nebula Graph address, default: nebula")(f) + f = click.option("-p", "--password", help="Nebula Graph address, default: nebula")( + f + ) f = click.option( "-s", "--space", @@ -32,8 +39,12 @@ def cli(): @cli.command(help="generate and split ldbc data") -@click.option("-s", "--scale-factor", default="1", help="scale factor for ldbc, default: 1") -@click.option("-og", "--only-generate", default=False, is_flag=True, help="only generate data") +@click.option( + "-s", "--scale-factor", default="1", help="scale factor for ldbc, default: 1" +) +@click.option( + "-og", "--only-generate", default=False, is_flag=True, help="only generate data" +) @click.option( "-os", "--only-split", @@ -69,10 +80,14 @@ def nebula(): @click.option("-a", "--address", help="Nebula Graph address, default: 127.0.0.1:9669") @click.option("-u", "--user", help="Nebula Graph address, default: root") @click.option("-p", "--password", help="Nebula Graph address, default: nebula") -@click.option("-k", "--keep", help="keep spaces that not be dropped, e.g. space1,space2") +@click.option( + "-k", "--keep", help="keep spaces that not be dropped, e.g. space1,space2" +) def clean(address, user, password, keep): sc = NebulaController(user=user, password=password, address=address) - value = click.confirm("Will delete all spaces in Nebula Graph. Continue?", abort=True) + value = click.confirm( + "Will delete all spaces in Nebula Graph. Continue?", abort=True + ) sc.clean_spaces(keep) @@ -91,9 +106,18 @@ def clean(address, user, password, keep): is_flag=True, help="Dry run, just dump the import config file, default: False", ) -def importer(folder, address, user, password, space, vid_type, dry_run): +@click.option( + "-p", + "--enable-prefix", + default=False, + is_flag=True, + help="enable add prefix in vid, vid type should be string", +) +def importer(folder, address, user, password, space, vid_type, enable_prefix, dry_run): assert vid_type in ["int", "string"], 'the vid type should be "ini" or "string" ' - nc = NebulaController(folder, space, user, password, address, vid_type) + nc = NebulaController( + folder, space, user, password, address, vid_type, enable_prefix + ) c = nc.import_space(dry_run) if c != 0: exit(c) @@ -122,7 +146,16 @@ def stress(): ) @click.option("--args", help="extend args for test tool") def run( - folder, address, user, password, space, vid_type, scenario, controller, args, dry_run + folder, + address, + user, + password, + space, + vid_type, + scenario, + controller, + args, + dry_run, ): stress = StressFactory.gen_stress( _type=controller, @@ -133,7 +166,7 @@ def run( space=space, vid_type=vid_type, scenarios=scenario, - args = args, + args=args, dry_run=dry_run, ) stress.run() diff --git a/nebula_bench/controller.py b/nebula_bench/controller.py index 078af1b..1717c57 100644 --- a/nebula_bench/controller.py +++ b/nebula_bench/controller.py @@ -9,7 +9,9 @@ class BaseController(object): - def __init__(self, data_folder=None, space=None, user=None, password=None, address=None): + def __init__( + self, data_folder=None, space=None, user=None, password=None, address=None + ): self.workspace_path = setting.WORKSPACE_PATH self.data_folder = data_folder or setting.DATA_FOLDER self.data_folder = Path(self.data_folder) @@ -21,7 +23,14 @@ def __init__(self, data_folder=None, space=None, user=None, password=None, addre class NebulaController(BaseController): def __init__( - self, data_folder=None, space=None, user=None, password=None, address=None, vid_type=None + self, + data_folder=None, + space=None, + user=None, + password=None, + address=None, + vid_type=None, + enable_prefix=None, ): super().__init__( data_folder=data_folder, @@ -31,6 +40,7 @@ def __init__( address=address, ) self.vid_type = vid_type or "int" + self.enable_prefix = enable_prefix def import_space(self, dry_run=False): result_file = self.dump_nebula_importer() @@ -40,10 +50,15 @@ def import_space(self, dry_run=False): return 0 def dump_nebula_importer(self): - _type = "int64" if self.vid_type == "int" else "fixed_string(20)" + kwargs = {} + if self.enable_prefix and self.vid_type == 'int': + raise Exception("must use prefix with vid type string") + else: + kwargs["enable_prefix"] = self.enable_prefix + p = parser.Parser(parser.NebulaDumper, self.data_folder) dumper = p.parse() - kwargs = {} + kwargs["space"] = self.space kwargs["user"] = self.user kwargs["password"] = self.password @@ -85,7 +100,9 @@ def load_scenarios(self, scenario): class_name=scenario, ) else: - return utils.load_class(package_name, load_all=True, base_class=BaseScenario) + return utils.load_class( + package_name, load_all=True, base_class=BaseScenario + ) def run(self, nebula_scenario): result_folder = "target/result" diff --git a/nebula_bench/parser.py b/nebula_bench/parser.py index 39e5709..990e66c 100644 --- a/nebula_bench/parser.py +++ b/nebula_bench/parser.py @@ -7,6 +7,19 @@ from nebula_bench import setting from nebula_bench.utils import jinja_dump +prefix_map = { + "comment": "c-", + "forum": "f-", + "organisation": "o-", + "person": "p-", + "place": "l-", + "post": "s-", + "tag": "t-", + "tagclass": "g-", + "emailaddress": "ea-", + "language": "lg-", +} + class PropTypeEnum(enum.Enum): INT = "int" @@ -43,6 +56,7 @@ def __init__(self, name=None, index=None): Base.__init__(self, name, index) self.path = None self.prop_list = [] + self.prefix = None class Edge(Base): @@ -50,6 +64,7 @@ def __init__(self, name=None, index=None): Base.__init__(self, name, index) self.src_index = self.dst_index = None self.src_name = self.dst_name = None + self.src_prefix = self.dst_prefix = None self.prop_list = [] @@ -91,6 +106,7 @@ def parse_vertex(self, file_path): vertex = Vertex(name) vertex.path = str(file_path.absolute()) + vertex.prefix = prefix_map.get(name.lower(), "") header_path = Path(file_path.parent / (file_name + "_header.csv")) with open(str(header_path.absolute()), "r") as fl: @@ -157,9 +173,11 @@ def parse_edge(self, file_path): flag = not flag name = h.rsplit(".id", 1)[0].lower() edge.src_name, edge.src_index = name, index + edge.src_prefix = prefix_map.get(name, "") elif h.lower() == dst_vertex.lower() + ".id": name = h.rsplit(".id", 1)[0].lower() edge.dst_name, edge.dst_index = name, index + edge.dst_prefix = prefix_map.get(name, "") else: p = Prop() @@ -219,6 +237,7 @@ def __init__(self, parser, result_file=None, template_file=None): def dump(self, *args, **kwargs): vid_type = kwargs.pop("vid_type", "int") + enable_prefix = kwargs.pop("enable_prefix", False) if vid_type == "int": self.template_file = self.template_file or "nebula-import-vid-int.yaml.j2" elif vid_type == "string": @@ -230,6 +249,7 @@ def dump(self, *args, **kwargs): kwargs["edge_list"] = self._parser.edge_list kwargs["vertex_set"] = self._parser.vertex_set kwargs["edge_set"] = self._parser.edge_set + kwargs["enable_prefix"] = enable_prefix jinja_dump(self.template_file, self.result_file, kwargs) return self.result_file diff --git a/scripts/copy-data.py b/scripts/copy-data.py index fb07bea..a436cd9 100644 --- a/scripts/copy-data.py +++ b/scripts/copy-data.py @@ -8,7 +8,8 @@ _all_csv_files_copy = [] _all_csv_files_need_fix_title = [ 'static/place_isPartOf_place_header.csv.copy', - 'dynamic/person_knows_person_header.csv.copy'] + 'dynamic/person_knows_person_header.csv.copy', +] if __name__ == "__main__": argv = sys.argv[1:] @@ -22,14 +23,14 @@ _csv_dir = arg all_dir_list = os.listdir(_csv_dir) for dir in all_dir_list: - if os.path.isdir(_csv_dir+'/'+dir): - dir_list = os.listdir(_csv_dir+'/'+dir) + if os.path.isdir(_csv_dir + '/' + dir): + dir_list = os.listdir(_csv_dir + '/' + dir) for file in dir_list: if file.endswith('.csv'): - _all_csv_files.append(dir+'/'+file) + _all_csv_files.append(dir + '/' + file) elif file.endswith('.copy'): - _all_csv_files_copy.append(dir+'/'+file) + _all_csv_files_copy.append(dir + '/' + file) for dir in _all_csv_files: - os.remove(_csv_dir+dir) + os.remove(_csv_dir + dir) for dir in _all_csv_files_copy: - os.rename(_csv_dir+dir, _csv_dir+dir[:-5]) + os.rename(_csv_dir + dir, _csv_dir + dir[:-5]) diff --git a/scripts/generate-data.sh b/scripts/generate-data.sh index a5ca388..d2aabe4 100755 --- a/scripts/generate-data.sh +++ b/scripts/generate-data.sh @@ -59,7 +59,7 @@ echo "ldbc.snb.datagen.util.formatter.StringDateFormatter.dateTimeFormat:yyyy-MM # set this to the Hadoop 3.2.1 directory export HADOOP_HOME=${HADOOP_HOME} && \ export LDBC_SNB_DATAGEN_HOME=`pwd` && \ -sh run.sh && \ +bash run.sh && \ rm -rf ${DATA_DIR}/test_data && \ mv test_data ${DATA_DIR}/. diff --git a/templates/nebula-import-vid-int.yaml.j2 b/templates/nebula-import-vid-int.yaml.j2 index 651a8ad..ef05dcb 100644 --- a/templates/nebula-import-vid-int.yaml.j2 +++ b/templates/nebula-import-vid-int.yaml.j2 @@ -54,6 +54,7 @@ files: vid: index: {{ vertex.index }} type: int + tags: - name: {{ vertex.name }} props: @@ -81,9 +82,11 @@ files: srcVID: index: {{ edge.src_index }} type: int + dstVID: index: {{ edge.dst_index }} type: int + prefix: {{ edge.dst_prefix }} props: {%- for prop in edge.prop_list %} - name: {{ prop.name }} diff --git a/templates/nebula-import-vid-string.yaml.j2 b/templates/nebula-import-vid-string.yaml.j2 index ecff1cd..6eb685b 100644 --- a/templates/nebula-import-vid-string.yaml.j2 +++ b/templates/nebula-import-vid-string.yaml.j2 @@ -54,6 +54,9 @@ files: vid: index: {{ vertex.index }} type: string + {%- if enable_prefix %} + prefix: {{ vertex.prefix }} + {% endif %} tags: - name: {{ vertex.name }} props: @@ -81,9 +84,15 @@ files: srcVID: index: {{ edge.src_index }} type: string + {%- if enable_prefix %} + prefix: {{ edge.src_prefix }} + {% endif %} dstVID: index: {{ edge.dst_index }} type: string + {%- if enable_prefix %} + prefix: {{ edge.dst_prefix }} + {% endif %} props: {%- for prop in edge.prop_list %} - name: {{ prop.name }} From d9cbef3a5c3f3eb256d94d608acbddefa809d900 Mon Sep 17 00:00:00 2001 From: HarrisChu <1726587+HarrisChu@users.noreply.github.com> Date: Wed, 13 Apr 2022 15:48:49 +0800 Subject: [PATCH 2/3] fix --- templates/nebula-import-vid-int.yaml.j2 | 1 - 1 file changed, 1 deletion(-) diff --git a/templates/nebula-import-vid-int.yaml.j2 b/templates/nebula-import-vid-int.yaml.j2 index ef05dcb..b5be328 100644 --- a/templates/nebula-import-vid-int.yaml.j2 +++ b/templates/nebula-import-vid-int.yaml.j2 @@ -86,7 +86,6 @@ files: dstVID: index: {{ edge.dst_index }} type: int - prefix: {{ edge.dst_prefix }} props: {%- for prop in edge.prop_list %} - name: {{ prop.name }} From 54238dcb3b2804ae659e1a178f9a00e045d02fe5 Mon Sep 17 00:00:00 2001 From: HarrisChu <1726587+HarrisChu@users.noreply.github.com> Date: Fri, 15 Apr 2022 09:14:55 +0800 Subject: [PATCH 3/3] fix prefix --- nebula_bench/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nebula_bench/parser.py b/nebula_bench/parser.py index 990e66c..63bdbe2 100644 --- a/nebula_bench/parser.py +++ b/nebula_bench/parser.py @@ -16,8 +16,8 @@ "post": "s-", "tag": "t-", "tagclass": "g-", - "emailaddress": "ea-", - "language": "lg-", + "emailaddress": "e-", + "language": "u-", }