Gremlin python


Wolfgang Fahl

Loading the air-routes example[edit]

Kelvin Lawrence has a nice example in his tutorial - the https://github.com/krlawrence/graph/blob/master/sample-data/air-routes-small.graphml is also available for this tutorial

from tutorial import remote
import os

# initialize a remote traversal
g = remote.RemoteTraversal().g()

# test loading a graph
def test_loadGraph():
   graphmlFile="air-routes-small.xml";
   # make the local file accessible to the server
   airRoutesPath=os.path.abspath(graphmlFile)
   # drop the existing content of the graph
   g.V().drop().iterate()
   # read the content from the air routes example
   g.io(airRoutesPath).read().iterate()
   vCount=g.V().count().next()
   print ("%s has %d vertices" % (graphmlFile,vCount))
   assert vCount==47

test_loadGraph()

Saving a graph[edit]

Let's create a graph containing a single node for the fish named Wanda and save it.

# test saving a graph
def test_saveGraph():
   graphmlPath="/tmp/A-Fish-Named-Wanda.xml"
   # drop the existing content of the graph
   g.V().drop().iterate()
   g.addV("Fish").property("name","Wanda").iterate()
   g.io(graphmlPath).write().iterate()
   print("wrote graph to %s" % (graphmlPath))
   # check that the graphml file exists
   assert os.path.isfile(graphmlPath)

Creating a graphical representation of a graph[edit]

A simple way to visualize your graphs is using graphviz. There is a graphviz python module with documentation.

Example Graphviz Usage[edit]

see https://github.com/WolfgangFahl/gremlin-python-tutorial/blob/master/test_005_graphviz.py

# see https://github.com/WolfgangFahl/gremlin-python-tutorial/blob/master/test_005_graphviz.py
from tutorial import remote
from graphviz import Digraph
import os.path
from gremlin_python.process.traversal import T

# initialize a remote traversal
g = remote.RemoteTraversal().g()

# test creating a graphviz graph from the tinkerpop graph
def test_createGraphvizGraph():
    # make sure we re-load the tinkerpop modern example
    remoteTraversal=remote.RemoteTraversal()
    remoteTraversal.load("tinkerpop-modern.xml")
    # start a graphviz
    dot = Digraph(comment='Modern')
    # get vertice properties including id and label as dicts
    for vDict in g.V().valueMap(True).toList():
        # uncomment to debug
        # print vDict
        # get id and label
        vId=vDict[T.id]
        vLabel=vDict[T.label]
        # greate a graphviz node label
        # name property is alway there
        gvLabel=r"%s\n%s\nname=%s" % (vId,vLabel,vDict["name"][0])
        # if there is an age property add it to the label
        if "age" in vDict:
            gvLabel=gvLabel+r"\nage=%s" % (vDict["age"][0])
        # create a graphviz node
        dot.node("node%d" % (vId),gvLabel)
    # loop over all edges
    for e in g.E():
        # get the detail information with a second call per edge (what a pitty to be so inefficient ...)
        eDict=g.E(e.id).valueMap(True).next()
        # uncomment if you'd like to debug
        # print (e,eDict)
        # create a graphviz label
        geLabel=r"%s\n%s\nweight=%s" % (e.id,e.label,eDict["weight"])
        # add a graphviz edge
        dot.edge("node%d" % (e.outV.id),"node%d" % (e.inV.id),label=geLabel)
    # modify the styling see http://www.graphviz.org/doc/info/attrs.html
    dot.edge_attr.update(arrowsize='2',penwidth='2')
    dot.node_attr.update(style='filled',fillcolor="#A8D0E4")
    # print the source code
    print (dot.source)
    # render without viewing - default is creating a pdf file
    dot.render('/tmp/modern.gv', view=False)
    # check that the pdf file exists
    assert os.path.isfile('/tmp/modern.gv.pdf')

# call the test
test_createGraphvizGraph()

Example Graphviz Usage[edit]

see https://github.com/WolfgangFahl/gremlin-python-tutorial/blob/master/test_005_graphviz.py

# see https://github.com/WolfgangFahl/gremlin-python-tutorial/blob/master/test_005_graphviz.py
from tutorial import remote
from graphviz import Digraph
import os.path
from gremlin_python.process.traversal import T

# initialize a remote traversal
g = remote.RemoteTraversal().g()

# test creating a graphviz graph from the tinkerpop graph
def test_createGraphvizGraph():
    # make sure we re-load the tinkerpop modern example
    remoteTraversal=remote.RemoteTraversal()
    remoteTraversal.load("tinkerpop-modern.xml")
    # start a graphviz
    dot = Digraph(comment='Modern')
    # get vertice properties including id and label as dicts
    for vDict in g.V().valueMap(True).toList():
        # uncomment to debug
        # print vDict
        # get id and label
        vId=vDict[T.id]
        vLabel=vDict[T.label]
        # greate a graphviz node label
        # name property is alway there
        gvLabel=r"%s\n%s\nname=%s" % (vId,vLabel,vDict["name"][0])
        # if there is an age property add it to the label
        if "age" in vDict:
            gvLabel=gvLabel+r"\nage=%s" % (vDict["age"][0])
        # create a graphviz node
        dot.node("node%d" % (vId),gvLabel)
    # loop over all edges
    for e in g.E():
        # get the detail information with a second call per edge (what a pitty to be so inefficient ...)
        eDict=g.E(e.id).valueMap(True).next()
        # uncomment if you'd like to debug
        # print (e,eDict)
        # create a graphviz label
        geLabel=r"%s\n%s\nweight=%s" % (e.id,e.label,eDict["weight"])
        # add a graphviz edge
        dot.edge("node%d" % (e.outV.id),"node%d" % (e.inV.id),label=geLabel)
    # modify the styling see http://www.graphviz.org/doc/info/attrs.html
    dot.edge_attr.update(arrowsize='2',penwidth='2')
    dot.node_attr.update(style='filled',fillcolor="#A8D0E4")
    # print the source code
    print (dot.source)
    # render without viewing - default is creating a pdf file
    dot.render('/tmp/modern.gv', view=False)
    # check that the pdf file exists
    assert os.path.isfile('/tmp/modern.gv.pdf')

# call the test
test_createGraphvizGraph()

Resutling graphviz dot source[edit]

// Modern
digraph {
	node [fillcolor="#A8D0E4" style=filled]
	edge [arrowsize=2 penwidth=2]
	node1 [label="1\nperson\nname=marko\nage=29"]
	node2 [label="2\nperson\nname=vadas\nage=27"]
	node3 [label="3\nsoftware\nname=lop"]
	node4 [label="4\nperson\nname=josh\nage=32"]
	node5 [label="5\nsoftware\nname=ripple"]
	node6 [label="6\nperson\nname=peter\nage=35"]
	node1 -> node2 [label="7\nknows\nweight=0.5"]
	node1 -> node4 [label="8\nknows\nweight=1.0"]
	node1 -> node3 [label="9\ncreated\nweight=0.4"]
	node4 -> node5 [label="10\ncreated\nweight=1.0"]
	node4 -> node3 [label="11\ncreated\nweight=0.4"]
	node6 -> node3 [label="12\ncreated\nweight=0.2"]
}

Resulting pdf file[edit]

If you set "view=True" the pdf display will be directly initiated from the python script. load PDF

Connecting to Gremlin enabled graph databases[edit]

According to the Gremlin Wiki page there are few different graph databases out there that support Gremlin/Apache Tinkerpop. We'll try to connect to a few of these using gremlin-python.

  • ❌ means we didn't get it to work even after trying
  • ❓ we didn't test it yet
  • ✅ means we got it working

Amazon Neptune ❓[edit]

Blazegraph ❓[edit]

Cosmos ❓[edit]

DataStax ❌[edit]

Blazegraph ❓[edit]

DataStax ❌[edit]

Trial[edit]

# https://hub.docker.com/_/datastax
image=datastax/dse-server:6.7.2
docker pull $image
docker run --name datastax  -e DS_LICENSE=accept -p 8182:8182 $image

JanusGraph ✅[edit]

3. Trial[edit]

docker run -it -p 8182:8182 --mount src=<path to graphdata>,target=/graphdata,type=bind janusgraph/janusgraph

see https://stackoverflow.com/a/60964495/1497139

With a bash your can check for available files

    docker run -it janusgraph/janusgraph /bin/bash
    root@8542ed1b8232:/opt/janusgraph# ls data
    grateful-dead-janusgraph-schema.groovy	tinkerpop-crew-typed.json
    grateful-dead-typed.json		tinkerpop-crew-v2d0-typed.json
    grateful-dead-v2d0-typed.json		tinkerpop-crew-v2d0.json
    grateful-dead-v2d0.json			tinkerpop-crew.json
    grateful-dead.json			tinkerpop-crew.kryo
    grateful-dead.kryo			tinkerpop-modern-typed.json
    grateful-dead.txt			tinkerpop-modern-v2d0-typed.json
    grateful-dead.xml			tinkerpop-modern-v2d0.json
    script-input-grateful-dead.groovy	tinkerpop-modern.json
    script-input-tinkerpop.groovy		tinkerpop-modern.kryo
    tinkerpop-classic-typed.json		tinkerpop-modern.xml
    tinkerpop-classic-v2d0-typed.json	tinkerpop-sink-typed.json
    tinkerpop-classic-v2d0.json		tinkerpop-sink-v2d0-typed.json
    tinkerpop-classic.json			tinkerpop-sink-v2d0.json
    tinkerpop-classic.kryo			tinkerpop-sink.json
    tinkerpop-classic.txt			tinkerpop-sink.kryo
    tinkerpop-classic.xml

for a test i choose tinkerpop-modern.xml:

        file="data/tinkerpop-modern.xml";
        g.io(file).read().iterate()
        vCount=g.V().count().next()
        print ("%s has %d vertices" % (file,vCount))
        assert vCount==6

which works. Thanks to Kelvin Lawrence for his comment on stackoverflow!

To make "external" data available to the docker image the --mount option can be used:

    docker run -it -p 8182:8182 --mount src=<path to graphdata>,target=/graphdata,type=bind janusgraph/janusgraph

The following helper class helps sharing files:

RemoteGremlin[edit]

see also Pyjanusgraph

    '''
    Created on 2020-03-30
    
    @author: wf
    '''
    from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
    from gremlin_python.structure.graph import Graph
    from shutil import copyfile
    import os
    
    class RemoteGremlin(object):
        '''
        helper for remote gremlin connections
        '''
    
        def __init__(self, server, port=8182):
            '''
            construct me with the given server and port
            '''
            self.server=server
            self.port=port    
            
        def sharepoint(self,sharepoint,sharepath):
            '''
            set up the sharepoint
            '''
            self.sharepoint=sharepoint
            self.sharepath=sharepath
            
            
        def share(self,file):
            '''
            share the given file  and return the path as seen by the server
            '''
            fbase=os.path.basename(file)
            copyfile(file,self.sharepoint+fbase)
            return self.sharepath+fbase
                
        def open(self):
            '''
            open the remote connection
            '''
            self.graph = Graph()
            self.url='ws://%s:%s/gremlin' % (self.server,self.port)
            self.connection = DriverRemoteConnection(self.url, 'g')    
            # The connection should be closed on shut down to close open connections with connection.close()
            self.g = self.graph.traversal().withRemote(self.connection)
    
        def close(self):
            '''
            close the remote connection
            '''
            self.connection.close()

python unit test[edit]

    '''
    Created on 2020-03-28
    
    @author: wf
    '''
    import unittest
    from tp.gremlin import RemoteGremlin
    
    class JanusGraphTest(unittest.TestCase):
        '''
        test access to a janus graph docker instance via the RemoteGremlin helper class
        '''
    
        def setUp(self):
            pass
    
    
        def tearDown(self):
            pass
        
        def test_loadGraph(self):
            # change to your server
            rg=RemoteGremlin("capri.bitplan.com")
            rg.open()
            # change to your shared path
            rg.sharepoint("/Volumes/bitplan/user/wf/graphdata/","/graphdata/")
            g=rg.g
            graphmlFile="air-routes-small.xml";
            shared=rg.share(graphmlFile)
            # drop the existing content of the graph
            g.V().drop().iterate()
            # read the content from the air routes example
            g.io(shared).read().iterate()
            vCount=g.V().count().next()
            print ("%s has %d vertices" % (shared,vCount))
            assert vCount==47
    
    
    if __name__ == "__main__":
        #import sys;sys.argv = ['', 'Test.testName']
        unittest.main()

2. Trial[edit]

docker run --rm --name janusgraph-default janusgraph/janusgraph:latest
waiting for storage ...
waiting for storage ...
waiting for storage ...
waiting for storage ...
...
GraphFactory message: GraphFactory could not instantiate this Graph implementation [class org.janusgraph.core.JanusGraphFactory]
java.lang.RuntimeException: GraphFactory could not instantiate this Graph implementation [class org.janusgraph.core.JanusGraphFactory]
	at org.apache.tinkerpop.gremlin.structure.util.GraphFactory.open(GraphFactory.java:82)
...
Caused by: javax.script.ScriptException: javax.script.ScriptException: groovy.lang.MissingPropertyException: No such property: graph for class: Script1
	at org.apache.tinkerpop.gremlin.groovy.jsr223.GremlinGroovyScriptEngine.eval(GremlinGroovyScriptEngine.java:378)
	at javax.script.AbstractScriptEngine.eval(AbstractScriptEngine.java:264)
	at org.apache.tinkerpop.gremlin.jsr223.DefaultGremlinScriptEngineManager.lambda$createGremlinScriptEngine$16(DefaultGremlinScriptEngineManager.java:460)
	... 24 more
4438 [gremlin-server-boss-1] INFO  org.apache.tinkerpop.gremlin.server.GremlinServer  - Channel started at port 8182.

When trying to connect with python via

def testJanusGraph(self):
        graph = Graph()
        connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g')
        # The connection should be closed on shut down to close open connections with connection.close()
        g = graph.traversal().withRemote(connection)
        # Reuse 'g' across the application
        herculesAge = g.V().has('name', 'hercules').values('age').next()
        print('Hercules is {} years old.'.format(herculesAge))
        pass

the result is

 File "/opt/local/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tornado/concurrent.py", line 238, in result
    raise_exc_info(self._exc_info)
  File "<string>", line 4, in raise_exc_info
ConnectionRefusedError: [Errno 61] Connection refused

1. Trial[edit]

  1. Downloaded 275 MByte janusgraph-0.4.0-hadoop2.zip - unzipped and started bin/gremlin-server.sh (already given several error messages)
  2. followed getting started procedure above
  3. started bin/gremlin.sh
graph = JanusGraphFactory.open('conf/janusgraph-berkeleyje-es.properties')
17:41:38 WARN  org.janusgraph.diskstorage.es.rest.RestElasticSearchClient  - Unable to determine Elasticsearch server version. Default to FIVE.
java.net.ConnectException: Connection refused
1
Gremlin python Wolfgang Fahl
🖨 🚪