Skip to content

Add Neptune Summary API support #457

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 9, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd
- Path: 04-Machine-Learning > Sample-Applications > 03-Real-Time-Fraud-Detection-Using-Inductive-Inference.ipynb
- New openCypher Language Tutorial notebooks
- Path: 06-Language-Tutorials > 02-openCypher
- Added support for Neptune Summary API ([Link to PR](https://github.com/aws/graph-notebook/pull/457))
- Added `--profile-misc-args` option to `%%gremlin` ([Link to PR](https://github.com/aws/graph-notebook/pull/443))
- Added error messaging for incompatible host-specific `%%graph_notebok_config` parameters ([Link to PR](https://github.com/aws/graph-notebook/pull/456))
- Ensure default assignments for all Gremlin nodes when using grouping ([Link to PR](https://github.com/aws/graph-notebook/pull/448))
56 changes: 46 additions & 10 deletions src/graph_notebook/magics/graph_magic.py
Original file line number Diff line number Diff line change
@@ -42,7 +42,8 @@
from graph_notebook.neptune.client import ClientBuilder, Client, VALID_FORMATS, PARALLELISM_OPTIONS, PARALLELISM_HIGH, \
LOAD_JOB_MODES, MODE_AUTO, FINAL_LOAD_STATUSES, SPARQL_ACTION, FORMAT_CSV, FORMAT_OPENCYPHER, FORMAT_NTRIPLE, \
FORMAT_NQUADS, FORMAT_RDFXML, FORMAT_TURTLE, STREAM_RDF, STREAM_PG, STREAM_ENDPOINTS, \
NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, STATISTICS_LANGUAGE_INPUTS, STATISTICS_MODES
NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, \
STATISTICS_LANGUAGE_INPUTS, STATISTICS_MODES, SUMMARY_MODES
from graph_notebook.network import SPARQLNetwork
from graph_notebook.network.gremlin.GremlinNetwork import parse_pattern_list_str, GremlinNetwork
from graph_notebook.visualization.rows_and_columns import sparql_get_rows_and_columns, opencypher_get_rows_and_columns
@@ -403,26 +404,61 @@ def statistics(self, line, local_ns: dict = None):
help=f'The language endpoint to use. Valid inputs: {STATISTICS_LANGUAGE_INPUTS}. '
f'Default: propertygraph.',
choices=STATISTICS_LANGUAGE_INPUTS)
parser.add_argument('-m', '--mode', type=str, default='status',
parser.add_argument('-m', '--mode', type=str, default='',
help=f'The action to perform on the statistics endpoint. Valid inputs: {STATISTICS_MODES}. '
f'Default: status')
f'Default: `basic` if `--summary` is specified, otherwise `status`.')
parser.add_argument('--summary', action='store_true', default=False, help="Retrieves the graph summary.")
parser.add_argument('--silent', action='store_true', default=False, help="Display no output.")
parser.add_argument('--store-to', type=str, default='')

args = parser.parse_args(line.split())

if args.mode not in STATISTICS_MODES:
print(f'Invalid mode. Please specify one of: {STATISTICS_MODES}, or leave blank to retrieve status.')
mode = args.mode

if not mode:
mode = 'basic' if args.summary else 'status'
elif (args.summary and mode not in SUMMARY_MODES) or (not args.summary and mode not in STATISTICS_MODES):
err_endpoint_type, err_mode_list, err_default_mode = ("summary", SUMMARY_MODES[1:], "basic summary view") \
if args.summary else ("statistics", STATISTICS_MODES[1:], "status")
print(f'Invalid {err_endpoint_type} mode. Please specify one of: {err_mode_list}, '
f'or leave blank to retrieve {err_default_mode}.')
return

statistics_res = self.client.statistics(args.language, args.mode)
statistics_res = self.client.statistics(args.language, args.summary, mode)
statistics_res.raise_for_status()
res = statistics_res.json()
statistics_res_json = statistics_res.json()
if not args.silent:
print(json.dumps(res, indent=2))
print(json.dumps(statistics_res_json, indent=2))

if args.store_to != '' and local_ns is not None:
local_ns[args.store_to] = res
local_ns[args.store_to] = statistics_res_json

@line_magic
def summary(self, line, local_ns: dict = None):
parser = argparse.ArgumentParser()
parser.add_argument('language', nargs='?', type=str.lower, default="propertygraph",
help=f'The language endpoint to use. Valid inputs: {STATISTICS_LANGUAGE_INPUTS}. '
f'Default: propertygraph.',
choices=STATISTICS_LANGUAGE_INPUTS)
parser.add_argument('--detailed', action='store_true', default=False,
help="Toggles the display of structures fields on or off in the output. If not supplied, "
"we will default to the basic summary display mode.")
parser.add_argument('--silent', action='store_true', default=False, help="Display no output.")
parser.add_argument('--store-to', type=str, default='')

args = parser.parse_args(line.split())
if args.detailed:
mode = "detailed"
else:
mode = "basic"

summary_res = self.client.statistics(args.language, True, mode)
summary_res.raise_for_status()
summary_res_json = summary_res.json()
if not args.silent:
print(json.dumps(summary_res_json, indent=2))

if args.store_to != '' and local_ns is not None:
local_ns[args.store_to] = summary_res_json

@line_magic
def graph_notebook_host(self, line):
35 changes: 22 additions & 13 deletions src/graph_notebook/neptune/client.py
Original file line number Diff line number Diff line change
@@ -109,8 +109,9 @@
GRAPHSONV2_VARIANTS = ['graphsonv2', 'graphsonv2d0', 'graphsonserializersv2d0']
GRAPHBINARYV1_VARIANTS = ['graphbinaryv1', 'graphbinary', 'graphbinaryserializersv1']

STATISTICS_MODES = ["status", "disableAutoCompute", "enableAutoCompute", "refresh", "delete"]
STATISTICS_LANGUAGE_INPUTS = ["propertygraph", "pg", "gremlin", "sparql", "rdf"]
STATISTICS_MODES = ["", "status", "disableAutoCompute", "enableAutoCompute", "refresh", "delete"]
SUMMARY_MODES = ["", "basic", "detailed"]
STATISTICS_LANGUAGE_INPUTS = ["propertygraph", "pg", "gremlin", "oc", "opencypher", "sparql", "rdf"]


def is_allowed_neptune_host(hostname: str, host_allowlist: list):
@@ -744,22 +745,30 @@ def _query_status(self, language: str, *, query_id: str = '', **kwargs) -> reque
res = self._http_session.send(req, verify=self.ssl_verify)
return res

def statistics(self, language: str, mode: str = '') -> requests.Response:
def statistics(self, language: str, summary: bool = False, mode: str = '') -> requests.Response:
headers = {
'Accept': 'application/json'
}
if language in ["pg", "gremlin"]:
language = "propertygraph"
elif language == "rdf":
language = "sparql"
if language in ["gremlin", "oc", "opencypher"]:
language = "pg"
elif language == "sparql":
language = "rdf"

url = f'{self._http_protocol}://{self.host}:{self.port}/{language}/statistics'
if mode in ['', 'status']:
req = self._prepare_request('GET', url, headers=headers)
elif mode == 'delete':
req = self._prepare_request('DELETE', url, headers=headers)
data = {'mode': mode}

if summary:
summary_url = url + '/summary'
if not data['mode']:
data['mode'] = 'basic'
req = self._prepare_request('GET', summary_url, data=json.dumps(data), headers=headers)
else:
data = {'mode': mode}
req = self._prepare_request('POST', url, data=json.dumps(data), headers=headers)
if mode in ['', 'status']:
req = self._prepare_request('GET', url, headers=headers)
elif mode == 'delete':
req = self._prepare_request('DELETE', url, headers=headers)
else:
req = self._prepare_request('POST', url, data=json.dumps(data), headers=headers)
res = self._http_session.send(req)
return res

6 changes: 3 additions & 3 deletions test/integration/iam/statistics/test_statistics_with_iam.py
Original file line number Diff line number Diff line change
@@ -36,7 +36,7 @@ def test_statistics_disable_autocompute(self, lang):
expected = {
"status": "200 OK"
}
disable_res = self.client.statistics(lang, 'disableAutoCompute')
disable_res = self.client.statistics(lang, False, 'disableAutoCompute')
assert disable_res.status_code == 200
disable_status = disable_res.json()
self.assertEqual(disable_status, expected)
@@ -52,7 +52,7 @@ def test_statistics_enable_autocompute(self, lang):
expected = {
"status": "200 OK"
}
enable_res = self.client.statistics(lang, 'enableAutoCompute')
enable_res = self.client.statistics(lang, False, 'enableAutoCompute')
assert enable_res.status_code == 200
enable_status = enable_res.json()
self.assertEqual(enable_status, expected)
@@ -82,7 +82,7 @@ def test_statistics_delete(self, lang):
"statisticsId": -1
}
}
res = self.client.statistics(lang, 'delete')
res = self.client.statistics(lang, False, 'delete')
assert res.status_code == 200
statistics_status = res.json()
self.assertEqual(statistics_status, expected)
82 changes: 82 additions & 0 deletions test/integration/iam/statistics/test_summary_with_iam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
"""
Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
SPDX-License-Identifier: Apache-2.0
"""
import pytest
from botocore.session import get_session

from test.integration import IntegrationTest
from parameterized import parameterized

lang_list = ["pg", "sparql"]
summary_detailed_fields_pg = ["nodeStructures", "edgeStructures"]
summary_detailed_fields_rdf = ["subjectStructures"]


class TestSummaryWithIAM(IntegrationTest):
def setUp(self) -> None:
super().setUp()
self.client = self.client_builder.with_iam(get_session()).build()

@pytest.mark.neptune
@pytest.mark.iam
@parameterized.expand(lang_list)
def test_summary_default(self, lang):
expected_payload_fields = ['version', 'lastStatisticsComputationTime', 'graphSummary']
res = self.client.statistics(lang, True)
assert res.status_code == 200
summary_default = res.json()
self.assertEqual(summary_default['status'], '200 OK')
res_payload_fields = list(summary_default['payload'].keys())
for x in expected_payload_fields:
self.assertIn(x, res_payload_fields)

@pytest.mark.neptune
@pytest.mark.iam
def test_summary_basic_pg(self):
res = self.client.statistics("pg", True, "basic")
assert res.status_code == 200
summary_pg_basic = res.json()
self.assertEqual(summary_pg_basic['status'], '200 OK')
summary_pg_fields = list(summary_pg_basic['payload']['graphSummary'].keys())

self.assertIn("numNodes", summary_pg_fields)
for x in summary_detailed_fields_pg:
self.assertNotIn(x, summary_pg_fields)

@pytest.mark.neptune
@pytest.mark.iam
def test_summary_basic_rdf(self):
res = self.client.statistics("rdf", True, "basic")
assert res.status_code == 200
summary_rdf_basic = res.json()
self.assertEqual(summary_rdf_basic['status'], '200 OK')
summary_rdf_fields = list(summary_rdf_basic['payload']['graphSummary'].keys())

self.assertIn("numDistinctSubjects", summary_rdf_fields)
for x in summary_detailed_fields_rdf:
self.assertNotIn(x, summary_rdf_fields)

@pytest.mark.neptune
@pytest.mark.iam
def test_summary_detailed_pg(self):
res = self.client.statistics("pg", True, "detailed")
assert res.status_code == 200
summary_pg_detailed = res.json()
self.assertEqual(summary_pg_detailed['status'], '200 OK')
summary_pg_fields = list(summary_pg_detailed['payload']['graphSummary'].keys())

for x in summary_detailed_fields_pg:
self.assertIn(x, summary_pg_fields)

@pytest.mark.neptune
@pytest.mark.iam
def test_summary_detailed_rdf(self):
res = self.client.statistics("rdf", True, "detailed")
assert res.status_code == 200
summary_rdf_detailed = res.json()
self.assertEqual(summary_rdf_detailed['status'], '200 OK')
summary_rdf_fields = list(summary_rdf_detailed['payload']['graphSummary'].keys())

for x in summary_detailed_fields_rdf:
self.assertIn(x, summary_rdf_fields)