diff --git a/ChangeLog.md b/ChangeLog.md index fd5919bc..4f40052b 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -7,6 +7,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd - Path: 04-Machine-Learning > Sample-Applications > 03-Real-Time-Fraud-Detection-Using-Inductive-Inference.ipynb - New openCypher Language Tutorial notebooks - Path: 06-Language-Tutorials > 02-openCypher +- Added support for Neptune Summary API ([Link to PR](https://github.com/aws/graph-notebook/pull/457)) - Added `--profile-misc-args` option to `%%gremlin` ([Link to PR](https://github.com/aws/graph-notebook/pull/443)) - Added error messaging for incompatible host-specific `%%graph_notebok_config` parameters ([Link to PR](https://github.com/aws/graph-notebook/pull/456)) - Ensure default assignments for all Gremlin nodes when using grouping ([Link to PR](https://github.com/aws/graph-notebook/pull/448)) diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py index 996d8564..36feb24e 100644 --- a/src/graph_notebook/magics/graph_magic.py +++ b/src/graph_notebook/magics/graph_magic.py @@ -42,7 +42,8 @@ from graph_notebook.neptune.client import ClientBuilder, Client, VALID_FORMATS, PARALLELISM_OPTIONS, PARALLELISM_HIGH, \ LOAD_JOB_MODES, MODE_AUTO, FINAL_LOAD_STATUSES, SPARQL_ACTION, FORMAT_CSV, FORMAT_OPENCYPHER, FORMAT_NTRIPLE, \ FORMAT_NQUADS, FORMAT_RDFXML, FORMAT_TURTLE, STREAM_RDF, STREAM_PG, STREAM_ENDPOINTS, \ - NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, STATISTICS_LANGUAGE_INPUTS, STATISTICS_MODES + NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, \ + STATISTICS_LANGUAGE_INPUTS, STATISTICS_MODES, SUMMARY_MODES from graph_notebook.network import SPARQLNetwork from graph_notebook.network.gremlin.GremlinNetwork import parse_pattern_list_str, GremlinNetwork from graph_notebook.visualization.rows_and_columns import sparql_get_rows_and_columns, opencypher_get_rows_and_columns @@ -403,26 +404,61 @@ def statistics(self, line, local_ns: dict = None): help=f'The language endpoint to use. Valid inputs: {STATISTICS_LANGUAGE_INPUTS}. ' f'Default: propertygraph.', choices=STATISTICS_LANGUAGE_INPUTS) - parser.add_argument('-m', '--mode', type=str, default='status', + parser.add_argument('-m', '--mode', type=str, default='', help=f'The action to perform on the statistics endpoint. Valid inputs: {STATISTICS_MODES}. ' - f'Default: status') + f'Default: `basic` if `--summary` is specified, otherwise `status`.') + parser.add_argument('--summary', action='store_true', default=False, help="Retrieves the graph summary.") parser.add_argument('--silent', action='store_true', default=False, help="Display no output.") parser.add_argument('--store-to', type=str, default='') args = parser.parse_args(line.split()) - - if args.mode not in STATISTICS_MODES: - print(f'Invalid mode. Please specify one of: {STATISTICS_MODES}, or leave blank to retrieve status.') + mode = args.mode + + if not mode: + mode = 'basic' if args.summary else 'status' + elif (args.summary and mode not in SUMMARY_MODES) or (not args.summary and mode not in STATISTICS_MODES): + err_endpoint_type, err_mode_list, err_default_mode = ("summary", SUMMARY_MODES[1:], "basic summary view") \ + if args.summary else ("statistics", STATISTICS_MODES[1:], "status") + print(f'Invalid {err_endpoint_type} mode. Please specify one of: {err_mode_list}, ' + f'or leave blank to retrieve {err_default_mode}.') return - statistics_res = self.client.statistics(args.language, args.mode) + statistics_res = self.client.statistics(args.language, args.summary, mode) statistics_res.raise_for_status() - res = statistics_res.json() + statistics_res_json = statistics_res.json() if not args.silent: - print(json.dumps(res, indent=2)) + print(json.dumps(statistics_res_json, indent=2)) if args.store_to != '' and local_ns is not None: - local_ns[args.store_to] = res + local_ns[args.store_to] = statistics_res_json + + @line_magic + def summary(self, line, local_ns: dict = None): + parser = argparse.ArgumentParser() + parser.add_argument('language', nargs='?', type=str.lower, default="propertygraph", + help=f'The language endpoint to use. Valid inputs: {STATISTICS_LANGUAGE_INPUTS}. ' + f'Default: propertygraph.', + choices=STATISTICS_LANGUAGE_INPUTS) + parser.add_argument('--detailed', action='store_true', default=False, + help="Toggles the display of structures fields on or off in the output. If not supplied, " + "we will default to the basic summary display mode.") + parser.add_argument('--silent', action='store_true', default=False, help="Display no output.") + parser.add_argument('--store-to', type=str, default='') + + args = parser.parse_args(line.split()) + if args.detailed: + mode = "detailed" + else: + mode = "basic" + + summary_res = self.client.statistics(args.language, True, mode) + summary_res.raise_for_status() + summary_res_json = summary_res.json() + if not args.silent: + print(json.dumps(summary_res_json, indent=2)) + + if args.store_to != '' and local_ns is not None: + local_ns[args.store_to] = summary_res_json @line_magic def graph_notebook_host(self, line): diff --git a/src/graph_notebook/neptune/client.py b/src/graph_notebook/neptune/client.py index 41f3b202..97a17590 100644 --- a/src/graph_notebook/neptune/client.py +++ b/src/graph_notebook/neptune/client.py @@ -109,8 +109,9 @@ GRAPHSONV2_VARIANTS = ['graphsonv2', 'graphsonv2d0', 'graphsonserializersv2d0'] GRAPHBINARYV1_VARIANTS = ['graphbinaryv1', 'graphbinary', 'graphbinaryserializersv1'] -STATISTICS_MODES = ["status", "disableAutoCompute", "enableAutoCompute", "refresh", "delete"] -STATISTICS_LANGUAGE_INPUTS = ["propertygraph", "pg", "gremlin", "sparql", "rdf"] +STATISTICS_MODES = ["", "status", "disableAutoCompute", "enableAutoCompute", "refresh", "delete"] +SUMMARY_MODES = ["", "basic", "detailed"] +STATISTICS_LANGUAGE_INPUTS = ["propertygraph", "pg", "gremlin", "oc", "opencypher", "sparql", "rdf"] def is_allowed_neptune_host(hostname: str, host_allowlist: list): @@ -744,22 +745,30 @@ def _query_status(self, language: str, *, query_id: str = '', **kwargs) -> reque res = self._http_session.send(req, verify=self.ssl_verify) return res - def statistics(self, language: str, mode: str = '') -> requests.Response: + def statistics(self, language: str, summary: bool = False, mode: str = '') -> requests.Response: headers = { 'Accept': 'application/json' } - if language in ["pg", "gremlin"]: - language = "propertygraph" - elif language == "rdf": - language = "sparql" + if language in ["gremlin", "oc", "opencypher"]: + language = "pg" + elif language == "sparql": + language = "rdf" + url = f'{self._http_protocol}://{self.host}:{self.port}/{language}/statistics' - if mode in ['', 'status']: - req = self._prepare_request('GET', url, headers=headers) - elif mode == 'delete': - req = self._prepare_request('DELETE', url, headers=headers) + data = {'mode': mode} + + if summary: + summary_url = url + '/summary' + if not data['mode']: + data['mode'] = 'basic' + req = self._prepare_request('GET', summary_url, data=json.dumps(data), headers=headers) else: - data = {'mode': mode} - req = self._prepare_request('POST', url, data=json.dumps(data), headers=headers) + if mode in ['', 'status']: + req = self._prepare_request('GET', url, headers=headers) + elif mode == 'delete': + req = self._prepare_request('DELETE', url, headers=headers) + else: + req = self._prepare_request('POST', url, data=json.dumps(data), headers=headers) res = self._http_session.send(req) return res diff --git a/test/integration/iam/statistics/test_statistics_with_iam.py b/test/integration/iam/statistics/test_statistics_with_iam.py index 894c0eb7..919339df 100644 --- a/test/integration/iam/statistics/test_statistics_with_iam.py +++ b/test/integration/iam/statistics/test_statistics_with_iam.py @@ -36,7 +36,7 @@ def test_statistics_disable_autocompute(self, lang): expected = { "status": "200 OK" } - disable_res = self.client.statistics(lang, 'disableAutoCompute') + disable_res = self.client.statistics(lang, False, 'disableAutoCompute') assert disable_res.status_code == 200 disable_status = disable_res.json() self.assertEqual(disable_status, expected) @@ -52,7 +52,7 @@ def test_statistics_enable_autocompute(self, lang): expected = { "status": "200 OK" } - enable_res = self.client.statistics(lang, 'enableAutoCompute') + enable_res = self.client.statistics(lang, False, 'enableAutoCompute') assert enable_res.status_code == 200 enable_status = enable_res.json() self.assertEqual(enable_status, expected) @@ -82,7 +82,7 @@ def test_statistics_delete(self, lang): "statisticsId": -1 } } - res = self.client.statistics(lang, 'delete') + res = self.client.statistics(lang, False, 'delete') assert res.status_code == 200 statistics_status = res.json() self.assertEqual(statistics_status, expected) diff --git a/test/integration/iam/statistics/test_summary_with_iam.py b/test/integration/iam/statistics/test_summary_with_iam.py new file mode 100644 index 00000000..7ad00e6f --- /dev/null +++ b/test/integration/iam/statistics/test_summary_with_iam.py @@ -0,0 +1,82 @@ +""" +Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +SPDX-License-Identifier: Apache-2.0 +""" +import pytest +from botocore.session import get_session + +from test.integration import IntegrationTest +from parameterized import parameterized + +lang_list = ["pg", "sparql"] +summary_detailed_fields_pg = ["nodeStructures", "edgeStructures"] +summary_detailed_fields_rdf = ["subjectStructures"] + + +class TestSummaryWithIAM(IntegrationTest): + def setUp(self) -> None: + super().setUp() + self.client = self.client_builder.with_iam(get_session()).build() + + @pytest.mark.neptune + @pytest.mark.iam + @parameterized.expand(lang_list) + def test_summary_default(self, lang): + expected_payload_fields = ['version', 'lastStatisticsComputationTime', 'graphSummary'] + res = self.client.statistics(lang, True) + assert res.status_code == 200 + summary_default = res.json() + self.assertEqual(summary_default['status'], '200 OK') + res_payload_fields = list(summary_default['payload'].keys()) + for x in expected_payload_fields: + self.assertIn(x, res_payload_fields) + + @pytest.mark.neptune + @pytest.mark.iam + def test_summary_basic_pg(self): + res = self.client.statistics("pg", True, "basic") + assert res.status_code == 200 + summary_pg_basic = res.json() + self.assertEqual(summary_pg_basic['status'], '200 OK') + summary_pg_fields = list(summary_pg_basic['payload']['graphSummary'].keys()) + + self.assertIn("numNodes", summary_pg_fields) + for x in summary_detailed_fields_pg: + self.assertNotIn(x, summary_pg_fields) + + @pytest.mark.neptune + @pytest.mark.iam + def test_summary_basic_rdf(self): + res = self.client.statistics("rdf", True, "basic") + assert res.status_code == 200 + summary_rdf_basic = res.json() + self.assertEqual(summary_rdf_basic['status'], '200 OK') + summary_rdf_fields = list(summary_rdf_basic['payload']['graphSummary'].keys()) + + self.assertIn("numDistinctSubjects", summary_rdf_fields) + for x in summary_detailed_fields_rdf: + self.assertNotIn(x, summary_rdf_fields) + + @pytest.mark.neptune + @pytest.mark.iam + def test_summary_detailed_pg(self): + res = self.client.statistics("pg", True, "detailed") + assert res.status_code == 200 + summary_pg_detailed = res.json() + self.assertEqual(summary_pg_detailed['status'], '200 OK') + summary_pg_fields = list(summary_pg_detailed['payload']['graphSummary'].keys()) + + for x in summary_detailed_fields_pg: + self.assertIn(x, summary_pg_fields) + + @pytest.mark.neptune + @pytest.mark.iam + def test_summary_detailed_rdf(self): + res = self.client.statistics("rdf", True, "detailed") + assert res.status_code == 200 + summary_rdf_detailed = res.json() + self.assertEqual(summary_rdf_detailed['status'], '200 OK') + summary_rdf_fields = list(summary_rdf_detailed['payload']['graphSummary'].keys()) + + for x in summary_detailed_fields_rdf: + self.assertIn(x, summary_rdf_fields)