Skip to content

Enhancements for Gremlin HTTP #624

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd

- Added `--connection-protocol` option to `%%gremlin` ([Link to PR](https://github.com/aws/graph-notebook/pull/617))
- Added global Gremlin `connection_protocol` setting to `%%graph_notebook_config` ([Link to PR](https://github.com/aws/graph-notebook/pull/621))
- Added various enhancements for `%%gremlin` HTTP connections to Neptune ([Link to PR](https://github.com/aws/graph-notebook/pull/624))
- Restored left alignment of numeric value columns in results table widget ([Link to PR](https://github.com/aws/graph-notebook/pull/620))

## Release 4.4.0 (June 10, 2024)
Expand Down
8 changes: 6 additions & 2 deletions src/graph_notebook/configuration/get_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
SparqlSection, GremlinSection, Neo4JSection
from graph_notebook.neptune.client import NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, false_str_variants, \
DEFAULT_NEO4J_USERNAME, DEFAULT_NEO4J_PASSWORD, DEFAULT_NEO4J_DATABASE, \
NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, NEPTUNE_DB_CONFIG_NAMES, NEPTUNE_ANALYTICS_CONFIG_NAMES
NEPTUNE_DB_SERVICE_NAME, DEFAULT_WS_PROTOCOL, DEFAULT_HTTP_PROTOCOL

neptune_params = ['neptune_service', 'auth_mode', 'load_from_s3_arn', 'aws_region']
neptune_gremlin_params = ['connection_protocol']
Expand All @@ -33,11 +33,15 @@ def get_config_from_dict(data: dict, neptune_hosts: list = NEPTUNE_CONFIG_HOST_I
neptune_service = data['neptune_service'] if 'neptune_service' in data else NEPTUNE_DB_SERVICE_NAME
if 'gremlin' in data:
data['gremlin']['include_protocol'] = True
if 'connection_protocol' not in data['gremlin']:
data['gremlin']['connection_protocol'] = DEFAULT_WS_PROTOCOL \
if neptune_service == NEPTUNE_DB_SERVICE_NAME else DEFAULT_HTTP_PROTOCOL
gremlin_section = GremlinSection(**data['gremlin'])
if gremlin_section.to_dict()['traversal_source'] != 'g':
print('Ignoring custom traversal source, Amazon Neptune does not support this functionality.\n')
else:
gremlin_section = GremlinSection(include_protocol=True)
protocol = DEFAULT_WS_PROTOCOL if neptune_service == NEPTUNE_DB_SERVICE_NAME else DEFAULT_HTTP_PROTOCOL
gremlin_section = GremlinSection(include_protocol=True, connection_protocol=protocol)
if neo4j_section.to_dict()['username'] != DEFAULT_NEO4J_USERNAME \
or neo4j_section.to_dict()['password'] != DEFAULT_NEO4J_PASSWORD:
print('Ignoring Neo4J custom authentication, Amazon Neptune does not support this functionality.\n')
Expand Down
21 changes: 13 additions & 8 deletions src/graph_notebook/magics/graph_magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,16 @@
neptune_db_only, neptune_graph_only
from graph_notebook.magics.ml import neptune_ml_magic_handler, generate_neptune_ml_parser
from graph_notebook.magics.streams import StreamViewer
from graph_notebook.neptune.client import ClientBuilder, Client, PARALLELISM_OPTIONS, PARALLELISM_HIGH, \
from graph_notebook.neptune.client import (ClientBuilder, Client, PARALLELISM_OPTIONS, PARALLELISM_HIGH, \
LOAD_JOB_MODES, MODE_AUTO, FINAL_LOAD_STATUSES, SPARQL_ACTION, FORMAT_CSV, FORMAT_OPENCYPHER, FORMAT_NTRIPLE, \
DB_LOAD_TYPES, ANALYTICS_LOAD_TYPES, VALID_BULK_FORMATS, VALID_INCREMENTAL_FORMATS, \
FORMAT_NQUADS, FORMAT_RDFXML, FORMAT_TURTLE, STREAM_RDF, STREAM_PG, STREAM_ENDPOINTS, \
NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, \
STATISTICS_LANGUAGE_INPUTS, STATISTICS_LANGUAGE_INPUTS_SPARQL, STATISTICS_MODES, SUMMARY_MODES, \
SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, \
OPENCYPHER_STATUS_STATE_MODES, normalize_service_name, GRAPH_PG_INFO_METRICS, \
DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, normalize_protocol_name
SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, GREMLIN_EXPLAIN_MODES, \
OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, \
normalize_service_name, GRAPH_PG_INFO_METRICS, \
DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, normalize_protocol_name)
from graph_notebook.network import SPARQLNetwork
from graph_notebook.network.gremlin.GremlinNetwork import parse_pattern_list_str, GremlinNetwork
from graph_notebook.visualization.rows_and_columns import sparql_get_rows_and_columns, opencypher_get_rows_and_columns
Expand Down Expand Up @@ -534,7 +535,7 @@ def stream_viewer(self, line):

language = args.language
limit = args.limit
uri = self.client.get_uri_with_port()
uri = self.client.get_uri(include_port=True)
viewer = StreamViewer(self.client, uri, language, limit=limit)
viewer.show()

Expand Down Expand Up @@ -1034,8 +1035,9 @@ def gremlin(self, line, cell, local_ns: dict = None):
f'If not specified, defaults to the value of the gremlin.connection_protocol field '
f'in %graph_notebook_config. Please note that this option has no effect on the '
f'Profile and Explain modes, which must use HTTP.')
parser.add_argument('--explain-type', type=str.lower, default='',
help='Explain mode to use when using the explain query mode.')
parser.add_argument('--explain-type', type=str.lower, default='dynamic',
help=f'Explain mode to use when using the explain query mode. '
f'Accepted values: {GREMLIN_EXPLAIN_MODES}')
parser.add_argument('-p', '--path-pattern', default='', help='path pattern')
parser.add_argument('-g', '--group-by', type=str, default='',
help='Property used to group nodes (e.g. code, T.region) default is T.label')
Expand Down Expand Up @@ -1074,6 +1076,8 @@ def gremlin(self, line, cell, local_ns: dict = None):
'TinkerPop driver "Serializers" enum values. Default is GRAPHSON_V3_UNTYPED')
parser.add_argument('--profile-indexOps', action='store_true', default=False,
help='Show a detailed report of all index operations.')
parser.add_argument('--profile-debug', action='store_true', default=False,
help='Enable debug mode.')
parser.add_argument('--profile-misc-args', type=str, default='{}',
help='Additional profile options, passed in as a map.')
parser.add_argument('-sp', '--stop-physics', action='store_true', default=False,
Expand Down Expand Up @@ -1154,7 +1158,8 @@ def gremlin(self, line, cell, local_ns: dict = None):
profile_args = {"profile.results": args.profile_no_results,
"profile.chop": args.profile_chop,
"profile.serializer": serializer,
"profile.indexOps": args.profile_indexOps}
"profile.indexOps": args.profile_indexOps,
"profile.debug": args.profile_debug}
try:
profile_misc_args_dict = json.loads(args.profile_misc_args)
profile_args.update(profile_misc_args_dict)
Expand Down
7 changes: 5 additions & 2 deletions src/graph_notebook/magics/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,11 @@ def build_gremlin_metadata_from_query(query_type: str, results: any, res: Respon
if query_type == 'explain':
gremlin_metadata = create_propertygraph_metadata_obj('explain')
gremlin_metadata.set_request_metrics(res)
gremlin_metadata.set_metric_value('predicates', int((re.search(r'# of predicates: (.*?)\n', results).group(1))
.replace(".", '').replace(",", '')))
try:
gremlin_metadata.set_metric_value('predicates', int((re.search(r'# of predicates: (.*?)\n', results).group(1))
.replace(".", '').replace(",", '')))
except AttributeError:
pass
return gremlin_metadata
elif query_type == 'profile':
gremlin_metadata = create_propertygraph_metadata_obj('profile')
Expand Down
42 changes: 33 additions & 9 deletions src/graph_notebook/neptune/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@

SPARQL_EXPLAIN_MODES = ['dynamic', 'static', 'details']
OPENCYPHER_EXPLAIN_MODES = ['dynamic', 'static', 'details']
GREMLIN_EXPLAIN_MODES = ['dynamic', 'static', 'details']
OPENCYPHER_PLAN_CACHE_MODES = ['auto', 'enabled', 'disabled']
OPENCYPHER_DEFAULT_TIMEOUT = 120000
OPENCYPHER_STATUS_STATE_MODES = ['ALL', 'RUNNING', 'WAITING', 'CANCELLING']
Expand Down Expand Up @@ -257,7 +258,7 @@ def is_neptune_domain(self):
def is_analytics_domain(self):
return self.service == NEPTUNE_ANALYTICS_SERVICE_NAME

def get_uri_with_port(self, use_websocket=False, use_proxy=False):
def get_uri(self, use_websocket=False, use_proxy=False, include_port=True):
if use_websocket is True:
protocol = self._ws_protocol
else:
Expand All @@ -270,7 +271,9 @@ def get_uri_with_port(self, use_websocket=False, use_proxy=False):
uri_host = self.target_host
uri_port = self.target_port

uri = f'{protocol}://{uri_host}:{uri_port}'
uri = f'{protocol}://{uri_host}'
if include_port:
uri += f':{uri_port}'
return uri

def get_graph_id(self):
Expand Down Expand Up @@ -347,9 +350,9 @@ def sparql_cancel(self, query_id: str, silent: bool = False):
def get_gremlin_connection(self, transport_kwargs) -> client.Client:
nest_asyncio.apply()

ws_url = f'{self.get_uri_with_port(use_websocket=True, use_proxy=False)}/gremlin'
ws_url = f'{self.get_uri(use_websocket=True, use_proxy=False)}/gremlin'
if self.proxy_host != '':
proxy_http_url = f'{self.get_uri_with_port(use_websocket=False, use_proxy=True)}/gremlin'
proxy_http_url = f'{self.get_uri(use_websocket=False, use_proxy=True)}/gremlin'
transport_factory_args = lambda: AiohttpTransport(call_from_event_loop=True, proxy=proxy_http_url,
**transport_kwargs)
request = self._prepare_request('GET', proxy_http_url)
Expand Down Expand Up @@ -387,9 +390,17 @@ def gremlin_http_query(self, query, headers=None) -> requests.Response:
if headers is None:
headers = {}

data = {}
use_proxy = True if self.proxy_host != '' else False
uri = f'{self.get_uri_with_port(use_websocket=False, use_proxy=use_proxy)}/gremlin'
data = {'gremlin': query}
if self.is_analytics_domain():
uri = f'{self.get_uri(use_websocket=False, use_proxy=use_proxy, include_port=False)}/queries'
data['language'] = 'gremlin'
data['gremlin'] = query
headers['content-type'] = 'application/json'
else:
uri = f'{self.get_uri(use_websocket=False, use_proxy=use_proxy)}/gremlin'
data['gremlin'] = query

req = self._prepare_request('POST', uri, data=json.dumps(data), headers=headers)
res = self._http_session.send(req, verify=self.ssl_verify)
return res
Expand All @@ -412,12 +423,25 @@ def gremlin_profile(self, query: str, args={}) -> requests.Response:
return self._gremlin_query_plan(query=query, plan_type='profile', args=args)

def _gremlin_query_plan(self, query: str, plan_type: str, args: dict, ) -> requests.Response:
url = f'{self._http_protocol}://{self.host}:{self.port}/gremlin/{plan_type}'
data = {'gremlin': query}
data = {}
headers = {}
url = f'{self._http_protocol}://{self.host}'
if self.is_analytics_domain():
url += '/queries'
data['gremlin'] = query
data['language'] = 'gremlin'
headers['content-type'] = 'application/json'
if plan_type == 'explain':
data['explain.mode'] = args.pop('explain.mode')
elif plan_type == 'profile':
data['profile.debug'] = args.pop('profile.debug')
else:
url += f':{self.port}/gremlin/{plan_type}'
data['gremlin'] = query
if args:
for param, value in args.items():
data[param] = value
req = self._prepare_request('POST', url, data=json.dumps(data))
req = self._prepare_request('POST', url, data=json.dumps(data), headers=headers)
res = self._http_session.send(req, verify=self.ssl_verify)
return res

Expand Down
Loading