Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add prefix for LDBC #63

Merged
merged 3 commits into from
Apr 15, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 44 additions & 11 deletions nebula_bench/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from email.policy import default
import click

from nebula_bench import setting
Expand All @@ -12,11 +13,17 @@


def common(f):
f = click.option("-f", "--folder", help="ldbc data folder, default: target/data/test_data")(f)
f = click.option(
"-f", "--folder", help="ldbc data folder, default: target/data/test_data"
)(f)

f = click.option("-a", "--address", help="Nebula Graph address, default: 127.0.0.1:9669")(f)
f = click.option(
"-a", "--address", help="Nebula Graph address, default: 127.0.0.1:9669"
)(f)
f = click.option("-u", "--user", help="Nebula Graph address, default: root")(f)
f = click.option("-p", "--password", help="Nebula Graph address, default: nebula")(f)
f = click.option("-p", "--password", help="Nebula Graph address, default: nebula")(
f
)
f = click.option(
"-s",
"--space",
Expand All @@ -32,8 +39,12 @@ def cli():


@cli.command(help="generate and split ldbc data")
@click.option("-s", "--scale-factor", default="1", help="scale factor for ldbc, default: 1")
@click.option("-og", "--only-generate", default=False, is_flag=True, help="only generate data")
@click.option(
"-s", "--scale-factor", default="1", help="scale factor for ldbc, default: 1"
)
@click.option(
"-og", "--only-generate", default=False, is_flag=True, help="only generate data"
)
@click.option(
"-os",
"--only-split",
Expand Down Expand Up @@ -69,10 +80,14 @@ def nebula():
@click.option("-a", "--address", help="Nebula Graph address, default: 127.0.0.1:9669")
@click.option("-u", "--user", help="Nebula Graph address, default: root")
@click.option("-p", "--password", help="Nebula Graph address, default: nebula")
@click.option("-k", "--keep", help="keep spaces that not be dropped, e.g. space1,space2")
@click.option(
"-k", "--keep", help="keep spaces that not be dropped, e.g. space1,space2"
)
def clean(address, user, password, keep):
sc = NebulaController(user=user, password=password, address=address)
value = click.confirm("Will delete all spaces in Nebula Graph. Continue?", abort=True)
value = click.confirm(
"Will delete all spaces in Nebula Graph. Continue?", abort=True
)
sc.clean_spaces(keep)


Expand All @@ -91,9 +106,18 @@ def clean(address, user, password, keep):
is_flag=True,
help="Dry run, just dump the import config file, default: False",
)
def importer(folder, address, user, password, space, vid_type, dry_run):
@click.option(
"-p",
"--enable-prefix",
default=False,
is_flag=True,
help="enable add prefix in vid, vid type should be string",
)
def importer(folder, address, user, password, space, vid_type, enable_prefix, dry_run):
assert vid_type in ["int", "string"], 'the vid type should be "ini" or "string" '
nc = NebulaController(folder, space, user, password, address, vid_type)
nc = NebulaController(
folder, space, user, password, address, vid_type, enable_prefix
)
c = nc.import_space(dry_run)
if c != 0:
exit(c)
Expand Down Expand Up @@ -122,7 +146,16 @@ def stress():
)
@click.option("--args", help="extend args for test tool")
def run(
folder, address, user, password, space, vid_type, scenario, controller, args, dry_run
folder,
address,
user,
password,
space,
vid_type,
scenario,
controller,
args,
dry_run,
):
stress = StressFactory.gen_stress(
_type=controller,
Expand All @@ -133,7 +166,7 @@ def run(
space=space,
vid_type=vid_type,
scenarios=scenario,
args = args,
args=args,
dry_run=dry_run,
)
stress.run()
Expand Down
27 changes: 22 additions & 5 deletions nebula_bench/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@


class BaseController(object):
def __init__(self, data_folder=None, space=None, user=None, password=None, address=None):
def __init__(
self, data_folder=None, space=None, user=None, password=None, address=None
):
self.workspace_path = setting.WORKSPACE_PATH
self.data_folder = data_folder or setting.DATA_FOLDER
self.data_folder = Path(self.data_folder)
Expand All @@ -21,7 +23,14 @@ def __init__(self, data_folder=None, space=None, user=None, password=None, addre

class NebulaController(BaseController):
def __init__(
self, data_folder=None, space=None, user=None, password=None, address=None, vid_type=None
self,
data_folder=None,
space=None,
user=None,
password=None,
address=None,
vid_type=None,
enable_prefix=None,
):
super().__init__(
data_folder=data_folder,
Expand All @@ -31,6 +40,7 @@ def __init__(
address=address,
)
self.vid_type = vid_type or "int"
self.enable_prefix = enable_prefix

def import_space(self, dry_run=False):
result_file = self.dump_nebula_importer()
Expand All @@ -40,10 +50,15 @@ def import_space(self, dry_run=False):
return 0

def dump_nebula_importer(self):
_type = "int64" if self.vid_type == "int" else "fixed_string(20)"
kwargs = {}
if self.enable_prefix and self.vid_type == 'int':
raise Exception("must use prefix with vid type string")
else:
kwargs["enable_prefix"] = self.enable_prefix

p = parser.Parser(parser.NebulaDumper, self.data_folder)
dumper = p.parse()
kwargs = {}

kwargs["space"] = self.space
kwargs["user"] = self.user
kwargs["password"] = self.password
Expand Down Expand Up @@ -85,7 +100,9 @@ def load_scenarios(self, scenario):
class_name=scenario,
)
else:
return utils.load_class(package_name, load_all=True, base_class=BaseScenario)
return utils.load_class(
package_name, load_all=True, base_class=BaseScenario
)

def run(self, nebula_scenario):
result_folder = "target/result"
Expand Down
20 changes: 20 additions & 0 deletions nebula_bench/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@
from nebula_bench import setting
from nebula_bench.utils import jinja_dump

prefix_map = {
"comment": "c-",
"forum": "f-",
"organisation": "o-",
"person": "p-",
"place": "l-",
"post": "s-",
"tag": "t-",
"tagclass": "g-",
"emailaddress": "ea-",
HarrisChu marked this conversation as resolved.
Show resolved Hide resolved
"language": "lg-",
}


class PropTypeEnum(enum.Enum):
INT = "int"
Expand Down Expand Up @@ -43,13 +56,15 @@ def __init__(self, name=None, index=None):
Base.__init__(self, name, index)
self.path = None
self.prop_list = []
self.prefix = None


class Edge(Base):
def __init__(self, name=None, index=None):
Base.__init__(self, name, index)
self.src_index = self.dst_index = None
self.src_name = self.dst_name = None
self.src_prefix = self.dst_prefix = None
self.prop_list = []


Expand Down Expand Up @@ -91,6 +106,7 @@ def parse_vertex(self, file_path):

vertex = Vertex(name)
vertex.path = str(file_path.absolute())
vertex.prefix = prefix_map.get(name.lower(), "")

header_path = Path(file_path.parent / (file_name + "_header.csv"))
with open(str(header_path.absolute()), "r") as fl:
Expand Down Expand Up @@ -157,9 +173,11 @@ def parse_edge(self, file_path):
flag = not flag
name = h.rsplit(".id", 1)[0].lower()
edge.src_name, edge.src_index = name, index
edge.src_prefix = prefix_map.get(name, "")
elif h.lower() == dst_vertex.lower() + ".id":
name = h.rsplit(".id", 1)[0].lower()
edge.dst_name, edge.dst_index = name, index
edge.dst_prefix = prefix_map.get(name, "")

else:
p = Prop()
Expand Down Expand Up @@ -219,6 +237,7 @@ def __init__(self, parser, result_file=None, template_file=None):

def dump(self, *args, **kwargs):
vid_type = kwargs.pop("vid_type", "int")
enable_prefix = kwargs.pop("enable_prefix", False)
if vid_type == "int":
self.template_file = self.template_file or "nebula-import-vid-int.yaml.j2"
elif vid_type == "string":
Expand All @@ -230,6 +249,7 @@ def dump(self, *args, **kwargs):
kwargs["edge_list"] = self._parser.edge_list
kwargs["vertex_set"] = self._parser.vertex_set
kwargs["edge_set"] = self._parser.edge_set
kwargs["enable_prefix"] = enable_prefix

jinja_dump(self.template_file, self.result_file, kwargs)
return self.result_file
15 changes: 8 additions & 7 deletions scripts/copy-data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
_all_csv_files_copy = []
_all_csv_files_need_fix_title = [
'static/place_isPartOf_place_header.csv.copy',
'dynamic/person_knows_person_header.csv.copy']
'dynamic/person_knows_person_header.csv.copy',
]

if __name__ == "__main__":
argv = sys.argv[1:]
Expand All @@ -22,14 +23,14 @@
_csv_dir = arg
all_dir_list = os.listdir(_csv_dir)
for dir in all_dir_list:
if os.path.isdir(_csv_dir+'/'+dir):
dir_list = os.listdir(_csv_dir+'/'+dir)
if os.path.isdir(_csv_dir + '/' + dir):
dir_list = os.listdir(_csv_dir + '/' + dir)
for file in dir_list:
if file.endswith('.csv'):
_all_csv_files.append(dir+'/'+file)
_all_csv_files.append(dir + '/' + file)
elif file.endswith('.copy'):
_all_csv_files_copy.append(dir+'/'+file)
_all_csv_files_copy.append(dir + '/' + file)
for dir in _all_csv_files:
os.remove(_csv_dir+dir)
os.remove(_csv_dir + dir)
for dir in _all_csv_files_copy:
os.rename(_csv_dir+dir, _csv_dir+dir[:-5])
os.rename(_csv_dir + dir, _csv_dir + dir[:-5])
2 changes: 1 addition & 1 deletion scripts/generate-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ echo "ldbc.snb.datagen.util.formatter.StringDateFormatter.dateTimeFormat:yyyy-MM
# set this to the Hadoop 3.2.1 directory
export HADOOP_HOME=${HADOOP_HOME} && \
export LDBC_SNB_DATAGEN_HOME=`pwd` && \
sh run.sh && \
bash run.sh && \
rm -rf ${DATA_DIR}/test_data && \
mv test_data ${DATA_DIR}/.

Expand Down
2 changes: 2 additions & 0 deletions templates/nebula-import-vid-int.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ files:
vid:
index: {{ vertex.index }}
type: int

tags:
- name: {{ vertex.name }}
props:
Expand Down Expand Up @@ -81,6 +82,7 @@ files:
srcVID:
index: {{ edge.src_index }}
type: int

dstVID:
index: {{ edge.dst_index }}
type: int
Expand Down
9 changes: 9 additions & 0 deletions templates/nebula-import-vid-string.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ files:
vid:
index: {{ vertex.index }}
type: string
{%- if enable_prefix %}
prefix: {{ vertex.prefix }}
{% endif %}
tags:
- name: {{ vertex.name }}
props:
Expand Down Expand Up @@ -81,9 +84,15 @@ files:
srcVID:
index: {{ edge.src_index }}
type: string
{%- if enable_prefix %}
prefix: {{ edge.src_prefix }}
{% endif %}
dstVID:
index: {{ edge.dst_index }}
type: string
{%- if enable_prefix %}
prefix: {{ edge.dst_prefix }}
{% endif %}
props:
{%- for prop in edge.prop_list %}
- name: {{ prop.name }}
Expand Down