diff --git a/ChangeLog.md b/ChangeLog.md index 233c54f3..b26fc6b2 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -5,6 +5,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd ## Upcoming - Added `%reset_graph` line magic ([Link to PR](https://github.com/aws/graph-notebook/pull/610)) - Added `%get_graph` line magic and enabled `%status` for Neptune Analytics ([Link to PR](https://github.com/aws/graph-notebook/pull/611)) +- Upgraded to Gremlin-Python 3.7 ([Link to PR](https://github.com/aws/graph-notebook/pull/597)) ## Release 4.3.1 (June 3, 2024) diff --git a/requirements.txt b/requirements.txt index a7a4529b..c4a54e26 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,8 +8,8 @@ jupyterlab_widgets>=1.0.0,<3.0.0 nbclient<=0.7.0 jupyter-contrib-nbextensions<=0.7.0 widgetsnbextension<=3.6.1 -gremlinpython>=3.5.1,<=3.6.2 -requests>=2.32.0,<=2.32.2 +gremlinpython>=3.5.1,<=3.7.2 +requests>=2.27.0,<=2.31.0 ipython>=7.16.1,<=8.10.0 ipykernel==5.3.4 ipyfilechooser==0.6.0 diff --git a/setup.py b/setup.py index 31a3c73d..37cb08ae 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,7 @@ def get_version(): package_dir={'': 'src'}, include_package_data=True, install_requires=[ - 'gremlinpython>=3.5.1,<=3.6.2', + 'gremlinpython>=3.5.1,<=3.7.2', 'SPARQLWrapper==2.0.0', 'requests>=2.32.0,<=2.32.2', 'ipywidgets==7.7.2', diff --git a/src/graph_notebook/configuration/generate_config.py b/src/graph_notebook/configuration/generate_config.py index 204f38a5..e868a4d3 100644 --- a/src/graph_notebook/configuration/generate_config.py +++ b/src/graph_notebook/configuration/generate_config.py @@ -66,6 +66,7 @@ def __init__(self, traversal_source: str = '', username: str = '', password: str traversal_source = DEFAULT_GREMLIN_TRAVERSAL_SOURCE serializer_lower = message_serializer.lower() + # TODO: Update with untyped serializers once supported in GremlinPython if serializer_lower == '': message_serializer = DEFAULT_GREMLIN_SERIALIZER elif serializer_lower in GRAPHSONV3_VARIANTS: diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py index c6cd10d8..30c38e75 100644 --- a/src/graph_notebook/magics/graph_magic.py +++ b/src/graph_notebook/magics/graph_magic.py @@ -135,12 +135,16 @@ SEED_NO_DATASETS_FOUND_MSG = "(No datasets available)" SEED_WIDGET_STYLE = {'description_width': '95px'} +# Tokens as currently defined in TinkerPop 3.7: https://github.com/apache/tinkerpop/blob/3.7-dev/gremlin-util/src/main/java/org/apache/tinkerpop/gremlin/util/ser/SerTokens.java serializers_map = { "MIME_JSON": "application/json", - "GRAPHSON_V2D0": "application/vnd.gremlin-v2.0+json", - "GRAPHSON_V3D0": "application/vnd.gremlin-v3.0+json", - "GRYO_V3D0": "application/vnd.gremlin-v3.0+gryo", - "GRAPHBINARY_V1D0": "application/vnd.graphbinary-v1.0" + "GRAPHSON_V1": "application/vnd.gremlin-v1.0+json", + "GRAPHSON_V1_UNTYPED": "application/vnd.gremlin-v1.0+json;types=false", + "GRAPHSON_V2": "application/vnd.gremlin-v2.0+json", + "GRAPHSON_V2_UNTYPED": "application/vnd.gremlin-v2.0+json;types=false", + "GRAPHSON_V3": "application/vnd.gremlin-v3.0+json", + "GRAPHSON_V3_UNTYPED": "application/vnd.gremlin-v3.0+json;types=false", + "GRAPHBINARY_V1": "application/vnd.graphbinary-v1.0" } DEFAULT_NAMEDGRAPH_URI = "http://aws.amazon.com/neptune/vocab/v01/DefaultNamedGraph" @@ -1057,9 +1061,9 @@ def gremlin(self, line, cell, local_ns: dict = None): 'the profile report by default.') parser.add_argument('--profile-chop', type=int, default=250, help='Property to specify max length of profile results string. Default is 250') - parser.add_argument('--profile-serializer', type=str, default='application/json', + parser.add_argument('--profile-serializer', type=str, default='GRAPHSON_V3_UNTYPED', help='Specify how to serialize results. Allowed values are any of the valid MIME type or ' - 'TinkerPop driver "Serializers" enum values. Default is application/json') + 'TinkerPop driver "Serializers" enum values. Default is GRAPHSON_V3_UNTYPED') parser.add_argument('--profile-indexOps', action='store_true', default=False, help='Show a detailed report of all index operations.') parser.add_argument('--profile-misc-args', type=str, default='{}', diff --git a/src/graph_notebook/neptune/client.py b/src/graph_notebook/neptune/client.py index 91e24074..f8c3c2dd 100644 --- a/src/graph_notebook/neptune/client.py +++ b/src/graph_notebook/neptune/client.py @@ -118,9 +118,9 @@ false_str_variants = [False, 'False', 'false', 'FALSE'] -GRAPHSONV3_VARIANTS = ['graphsonv3', 'graphsonv3d0', 'graphsonserializersv3d0'] -GRAPHSONV2_VARIANTS = ['graphsonv2', 'graphsonv2d0', 'graphsonserializersv2d0'] -GRAPHBINARYV1_VARIANTS = ['graphbinaryv1', 'graphbinary', 'graphbinaryserializersv1'] +GRAPHSONV3_VARIANTS = ['graphsonv3', 'graphsonv3d0', 'graphsonserializersv3d0', 'graphsonmessageserializerv3'] +GRAPHSONV2_VARIANTS = ['graphsonv2', 'graphsonv2d0', 'graphsonserializersv2d0', 'graphsonmessageserializerv2'] +GRAPHBINARYV1_VARIANTS = ['graphbinaryv1', 'graphbinary', 'graphbinaryserializersv1', 'graphbinarymessageserializerv1'] STATISTICS_MODES = ["", "status", "disableAutoCompute", "enableAutoCompute", "refresh", "delete"] SUMMARY_MODES = ["", "basic", "detailed"] diff --git a/test/unit/graph_magic/gremlin_profile_large_results_predicates.txt b/test/unit/graph_magic/gremlin_profile_large_results_predicates.txt index 7503a2f8..50363948 100644 --- a/test/unit/graph_magic/gremlin_profile_large_results_predicates.txt +++ b/test/unit/graph_magic/gremlin_profile_large_results_predicates.txt @@ -8,5 +8,5 @@ Results ======= Count: 999.999 Output: [v[3], v[3600], v[3614], v[4], v[5], v[6], v[7], v[8], v[9], v[10], v[11], v[12], v[47], v[49], v[136], v[13], v[15], v[16], v[17], v[18], v[389], v[20], v[21], v[22], v[23], v[24], v[25], v[26], v[27], v[28], v[416], v[29], v[30], v[430], v[31], v[9... -Response serializer: GRYO_V3D0 +Response serializer: application/vnd.gremlin-v3.0+json Response size (bytes): 23566 \ No newline at end of file diff --git a/test/unit/graph_magic/gremlin_profile_sample_response.txt b/test/unit/graph_magic/gremlin_profile_sample_response.txt index 9e6f39c0..fc93d929 100644 --- a/test/unit/graph_magic/gremlin_profile_sample_response.txt +++ b/test/unit/graph_magic/gremlin_profile_sample_response.txt @@ -4,11 +4,12 @@ Query String ================== -g.V().hasLabel("airport").has("code", "AUS").emit().repeat(in().simplePath()).times(2).limit(100) +g.V().has('code','LHR').outE().inV().path().by(valueMap(true)).limit(5) + Original Traversal ================== -[GraphStep(vertex,[]), HasStep([~label.eq(airport), code.eq(AUS)]), RepeatStep(emit(true),[VertexStep(IN,vertex), PathFilterStep(simple), RepeatEndStep],until(loops(2))), RangeGlobalStep(0,100)] +[GraphStep(vertex,[]), HasStep([code.eq(LHR)]), VertexStep(OUT,edge), EdgeVertexStep(IN), PathStep([[PropertyMapStep(value)]]), RangeGlobalStep(0,5)] Optimized Traversal =================== @@ -16,88 +17,93 @@ Neptune steps: [ NeptuneGraphQueryStep(Vertex) { JoinGroupNode { - PatternNode[(?1, , "AUS", ?) . project ?1 .], {estimatedCardinality=1, indexTime=84, hashJoin=true, joinTime=3, actualTotalOutput=1} - PatternNode[(?1, <~label>, ?2=, <~>) . project ask .], {estimatedCardinality=3374, indexTime=29, hashJoin=true, joinTime=0, actualTotalOutput=61} - RepeatNode { - Repeat { - PatternNode[(?3, ?5, ?1, ?6) . project ?1,?3 . IsEdgeIdFilter(?6) . SimplePathFilter(?1, ?3)) .], {hashJoin=true, estimatedCardinality=50148, indexTime=0, joinTime=3} - } - Emit { - Filter(true) - } - LoopsCondition { - LoopsFilter([?1, ?3],eq(2)) + PatternNode[VP(?1, , "LHR", <~>) . project ?1 .], {estimatedCardinality=1, expectedTotalOutput=1, indexTime=0, joinTime=0, numSearches=1, actualTotalOutput=1} + PatternNode[EL(?1, ?5, ?3, ?6) . project ?1,?6,?3 . IsEdgeIdFilter(?6) .], {estimatedCardinality=INFINITY, expectedTotalInput=1, indexTime=0, joinTime=0, numSearches=1} + }, finishers=[limit(5)], {path=[Vertex(?1):GraphStep, Edge(?6,?1,?3):VertexStep, Vertex(?3):EdgeVertexStep], joinStats=true, optimizationTime=2, maxVarId=15, executionTime=14} + }, + NeptunePathStep { + NeptunePropertyMapStep { + JoinGroupNode { + PatternNode[VP(?1, ?8, ?9, <~>) .] + }, {initialValues={?1=null, ?3=null, ?6=null}} + }, + NeptunePropertyMapStep { + JoinGroupNode { + UnionNode { + PatternNode[EP(?6, ?10, ?11, <~>) .], {estimatedCardinality=63580} + PatternNode[EL(?, ?12, ?, ?6) .], {estimatedCardinality=INFINITY} } - }, annotations={repeatMode=BFS, emitFirst=true, untilFirst=false, leftVar=?1, rightVar=?3} - }, finishers=[limit(100)], annotations={path=[Vertex(?1):GraphStep, Repeat[Vertex(?3):VertexStep]], joinStats=true, optimizationTime=495, maxVarId=7, executionTime=323} + }, {initialValues={?1=null, ?3=null, ?6=null}} + }, + NeptunePropertyMapStep { + JoinGroupNode { + PatternNode[VP(?3, ?13, ?14, <~>) .] + }, {initialValues={?1=null, ?3=null, ?6=null}} + } }, NeptuneTraverserConverterStep ] + Physical Pipeline ================= NeptuneGraphQueryStep |-- StartOp - |-- JoinGroupOp - |-- SpoolerOp(100) - |-- DynamicJoinOp(PatternNode[(?1, , "AUS", ?) . project ?1 .], {estimatedCardinality=1, indexTime=84, hashJoin=true}) - |-- SpoolerOp(100) - |-- DynamicJoinOp(PatternNode[(?1, <~label>, ?2=, <~>) . project ask .], {estimatedCardinality=3374, indexTime=29, hashJoin=true}) - |-- RepeatOp - |-- (Iteration 0) [visited=1, output=1 (until=0, emit=1), next=1] - |-- BindingSetQueue (Iteration 1) [visited=61, output=61 (until=0, emit=61), next=61] - |-- SpoolerOp(100) - |-- DynamicJoinOp(PatternNode[(?3, ?5, ?1, ?6) . project ?1,?3 . IsEdgeIdFilter(?6) . SimplePathFilter(?1, ?3)) .], {hashJoin=true, estimatedCardinality=50148, indexTime=0}) - |-- BindingSetQueue (Iteration 2) [visited=38, output=38 (until=38, emit=0), next=0] - |-- SpoolerOp(100) - |-- DynamicJoinOp(PatternNode[(?3, ?5, ?1, ?6) . project ?1,?3 . IsEdgeIdFilter(?6) . SimplePathFilter(?1, ?3)) .], {hashJoin=true, estimatedCardinality=50148, indexTime=0}) - |-- LimitOp(100) + |-- JoinGroupOp@2f500db7 + |-- DynamicJoinOp@78b965ae(PipelineJoinOp(PatternNode[VP(?1, , "LHR", <~>) . project ?1 .], {estimatedCardinality=1, expectedTotalOutput=1})) + |-- SpoolerOp(100, @78b965ae, null) + |-- DynamicJoinOp@67012ec9(PipelineJoinOp(PatternNode[EL(?1, ?5, ?3, ?6) . project ?1,?6,?3 . IsEdgeIdFilter(?6) .], {estimatedCardinality=INFINITY, expectedTotalInput=1})) + |-- LimitOp(5) + +NeptunePathStep + |-- BindingSetQueue + |-- JoinGroupOp@6a996745 + |-- DynamicJoinOp@417c3e4f(PipelineJoinOp(PatternNode[VP(?1, ?8, ?9, <~>) .])) + + |-- BindingSetQueue + |-- JoinGroupOp@2b0c033 + |-- UnionOp@44764c1 + |-- MultiplexerOp + + |-- BindingSetQueue + |-- JoinGroupOp@60bf88a9 + |-- DynamicJoinOp@32903d93(PipelineJoinOp(PatternNode[VP(?3, ?13, ?14, <~>) .])) Runtime (ms) ============ -Query Execution: 392.686 -Serialization: 2636.380 +Query Execution: 18.669 +Serialization: 15.464 Traversal Metrics ================= Step Count Traversers Time (ms) % Dur ------------------------------------------------------------------------------------------------------------- -NeptuneGraphQueryStep(Vertex) 100 100 314.162 82.78 -NeptuneTraverserConverterStep 100 100 65.333 17.22 - >TOTAL - - 379.495 - - -Repeat Metrics -============== -Iteration Visited Output Until Emit Next ------------------------------------------------------- - 0 1 1 0 1 1 - 1 61 61 0 61 61 - 2 38 38 38 0 0 ------------------------------------------------------- - 100 100 38 62 62 +NeptuneGraphQueryStep(Vertex) 5 5 2.152 20.56 +NeptunePathStep([[NeptunePropertyMapStep], [Nep... 5 5 4.317 41.25 +NeptuneTraverserConverterStep 5 5 3.997 38.19 + >TOTAL - - 10.466 - Predicates ========== -# of predicates: 16 - -WARNING: reverse traversal with no edge label(s) - .in() / .both() may impact query performance +# of predicates: 18 Results ======= -Count: 100 -Output: [v[3], v[3600], v[3614], v[4], v[5], v[6], v[7], v[8], v[9], v[10], v[11], v[12], v[47], v[49], v[136], v[13], v[15], v[16], v[17], v[18], v[389], v[20], v[21], v[22], v[23], v[24], v[25], v[26], v[27], v[28], v[416], v[29], v[30], v[430], v[31], v[9... -Response serializer: GRYO_V3D0 -Response size (bytes): 23566 +Count: 5 +Output: [path[{country=[UK], code=[LHR], longest=[12799], city=[London], lon=[-0.461941003799], type=[airport], label=airport, elev=[83], icao=[EGLL], id=49, runways=[2], region=[GB-ENG], lat=[51.4706001282], desc=[London Heathrow]}, {dist=3533, id=9144, lab... +Response serializer: application/vnd.gremlin-v3.0+json +Response size (bytes): 10162 + Index Operations ================ Query execution: - # of statement index ops: 3 - # of unique statement index ops: 3 + # of statement index ops: 18 + # of unique statement index ops: 18 Duplication ratio: 1.0 # of terms materialized: 0 Serialization: - # of statement index ops: 200 - # of unique statement index ops: 140 - Duplication ratio: 1.43 - # of terms materialized: 393 + # of statement index ops: 18 + # of unique statement index ops: 18 + Duplication ratio: 1.0 + # of terms materialized: 0 diff --git a/test/unit/graph_magic/metadata_gremlin_profile.py b/test/unit/graph_magic/metadata_gremlin_profile.py index 68f06c38..7945fa77 100644 --- a/test/unit/graph_magic/metadata_gremlin_profile.py +++ b/test/unit/graph_magic/metadata_gremlin_profile.py @@ -11,20 +11,20 @@ class TestMetadataClassFunctions(unittest.TestCase): def test_gremlin_profile_metadata_func(self): - time_expected = 392.686 - predicates_expected = 16 - results_num_expected = 100 - serialization_expected = 2636.380 - serializer_type_expected = "GRYO_V3D0" - results_size_expected = 23566 - query_total_index_ops_expected = 3 - query_unique_index_ops_expected = 3 + time_expected = 18.669 + predicates_expected = 18 + results_num_expected = 5 + serialization_expected = 15.464 + serializer_type_expected = "application/vnd.gremlin-v3.0+json" + results_size_expected = 10162 + query_total_index_ops_expected = 18 + query_unique_index_ops_expected = 18 query_duplication_ratio_expected = 1 query_terms_materialized_expected = 0 - seri_total_index_ops_expected = 200 - seri_unique_index_ops_expected = 140 - seri_duplication_ratio_expected = 1.43 - seri_terms_materialized_expected = 393 + seri_total_index_ops_expected = 18 + seri_unique_index_ops_expected = 18 + seri_duplication_ratio_expected = 1.0 + seri_terms_materialized_expected = 0 gremlin_metadata = Metadata() with open('gremlin_profile_sample_response.txt', 'r') as profile_file: