Skip to content

Add support for grouping by different properties by label in Gremlin #115

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 29, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -3,6 +3,10 @@
Starting with v1.31.6, this file will contain a record of major features and updates made in each release of graph-notebook.

## Upcoming
- Add support for notebook variables in Sparql/Gremlin magic queries ([Link to PR](https://github.com/aws/graph-notebook/pull/113))
- Add support for grouping by different properties per label in Gremlin ([Link to PR](https://github.com/aws/graph-notebook/pull/115))
- Fix missing Boto3 dependency in setup.py ([Link to PR](https://github.com/aws/graph-notebook/pull/118))


## Release 2.1.0 (April 15, 2021)

2 changes: 1 addition & 1 deletion src/graph_notebook/magics/graph_magic.py
Original file line number Diff line number Diff line change
@@ -340,7 +340,7 @@ def gremlin(self, line, cell, local_ns: dict = None):
parser.add_argument('query_mode', nargs='?', default='query',
help='query mode (default=query) [query|explain|profile]')
parser.add_argument('-p', '--path-pattern', default='', help='path pattern')
parser.add_argument('-g', '--group-by', default='T.label',
parser.add_argument('-g', '--group-by', type=str, default='T.label',
help='Property used to group nodes (e.g. code, T.region) default is T.label')
parser.add_argument('--store-to', type=str, default='', help='store query result to this variable')
parser.add_argument('--ignore-groups', action='store_true', default=False, help="Ignore all grouping options")
67 changes: 49 additions & 18 deletions src/graph_notebook/network/gremlin/GremlinNetwork.py
Original file line number Diff line number Diff line change
@@ -54,6 +54,10 @@ def parse_pattern_list_str(pattern_str: str) -> list:


def generate_id_from_dict(data: dict) -> str:
# Handle cases where user requests '~label' in valueMap step, since json can't serialize non-string keys
if T.label in data.keys():
data['label'] = data[T.label]
del data[T.label]
data_str = json.dumps(data, default=str)
hashed = hashlib.md5(data_str.encode())
generate_id = hashed.hexdigest()
@@ -93,8 +97,11 @@ def __init__(self, graph: MultiDiGraph = None, callbacks=None, label_max_length=
if graph is None:
graph = MultiDiGraph()
self.label_max_length = label_max_length
self.group_by_property = group_by_property
self.ignore_groups=ignore_groups
try:
self.group_by_property = json.loads(group_by_property)
except ValueError:
self.group_by_property = group_by_property
self.ignore_groups = ignore_groups
super().__init__(graph, callbacks)

def add_results_with_pattern(self, results, pattern_list: list):
@@ -271,35 +278,59 @@ def add_vertex(self, v):
if type(v) is Vertex:
node_id = v.id
title = v.label
if self.group_by_property in [T_LABEL, 'label']:
# This sets the group key to the label if either "label" is passed in or
# T.label is set in order to handle the default case of grouping by label
# when no explicit key is specified
group = v.label
elif self.group_by_property == 'id':
group = v.id
else:
group = ''
vertex_dict = v.__dict__
if not isinstance(self.group_by_property, dict): # Handle string format group_by
if self.group_by_property in [T_LABEL, 'label']: # this handles if it's just a string
# This sets the group key to the label if either "label" is passed in or
# T.label is set in order to handle the default case of grouping by label
# when no explicit key is specified
group = v.label
elif self.group_by_property == 'id':
group = v.id
else:
group = ''
else: # handle dict format group_by
try:
if str(v.label) in self.group_by_property:
if self.group_by_property[str(v.label)]['groupby'] in [T_LABEL, 'label']:
group = v.label
else:
group = vertex_dict[self.group_by_property[str(v.label)]['groupby']]
elif str(v.id) in self.group_by_property:
group = vertex_dict[self.group_by_property[str(v.id)]['groupby']]
else:
group = ''
except KeyError:
group = ''

label = title if len(title) <= self.label_max_length else title[:self.label_max_length - 3] + '...'
data = {'label': label, 'title': title, 'group': group, 'properties': {'id': node_id, 'label': title}}
elif type(v) is dict:
properties = {}

title = ''
label = ''
group = ''
# Before looping though properties, we first search for T.label in vertex dict, then set title = T.label
# Otherwise, we will hit KeyError if we don't iterate through T.label first to set the title
# Since it is needed for checking for the vertex label's desired grouping behavior in group_by_property
if T.label in v.keys():
title = str(v[T.label])
label = title if len(title) <= self.label_max_length else title[:self.label_max_length - 3] + '...'
for k in v:
if str(k) == T_LABEL:
title = str(v[k])
label = title if len(title) <= self.label_max_length else title[:self.label_max_length - 3] + '...'
elif str(k) == T_ID:
if str(k) == T_ID:
node_id = str(v[k])
properties[k] = v[k]
if str(k) == self.group_by_property:
if isinstance(self.group_by_property, dict):
try:
if str(k) == self.group_by_property[title]['groupby']:
group = str(v[k])
except KeyError:
continue
elif str(k) == self.group_by_property:
group = str(v[k])

# handle when there is no id in a node. In this case, we will generate one which
# is consistently regenerated so that duplicate dicts will be dedubed to the same vertex.
# is consistently regenerated so that duplicate dicts will be reduced to the same vertex.
if node_id == '':
node_id = f'{generate_id_from_dict(v)}'

Loading