import datetime
import random
from py2neo import neo4j
graph = neo4j.GraphDatabaseService("http://localhost:7474/db/data")
try:
neo4j.CypherQuery(graph,"""
CREATE CONSTRAINT ON (u:User).
ASSERT u.id IS UNIQUE
""").execute()
except:
pass
def delete_everything():
neo4j.CypherQuery(graph,"MATCH ()-[r]-() DELETE r").run()
neo4j.CypherQuery(graph,"MATCH (n) DELETE n").run()
def count_relationships():
return neo4j.CypherQuery(graph,"MATCH ()-[r]-() RETURN COUNT(r)").execute_one()
def count_nodes():
return neo4j.CypherQuery(graph,"MATCH (n) RETURN COUNT(n)").execute_one()
def rand_id():
"""gets a random id
It's equally likely to get an id from 0-5000 as it
is to get an id from 5000-inf.
"""
return int(random.expovariate(1.0/5000))
def try_algo(insert_friends_algo):
"""uses algorithm to insert random relationship
For 100 iterations this inserts a user and their 200 friends.
User and friend ids are selected at random.
Prints time required to insert.
All relationships and nodes are removed after test completes.
"""
start_time = datetime.datetime.now()
for i in range(100):
id = rand_id()
num_friends = 200
ids = []
for j in range(num_friends):
ids.append(rand_id())
insert_friends_algo(id,ids)
time_required = (datetime.datetime.now() - start_time).total_seconds()
rel_count = count_relationships()
node_count = count_nodes()
delete_everything()
print "time:%fs;\trelationship count:%d;\tnode count%d; rel/sec:%f;\tnode/sec:%f" % (time_required,rel_count,node_count,rel_count/time_required,node_count/time_required)
def insert_friends1(id,ids):
"""this was my original query"""
neo4j.CypherQuery(graph,"""
MERGE (a:User {id:{id}})
ON CREATE SET
a.created_at = timestamp()
SET a.friends_found_at = timestamp()
FOREACH (id IN {ids} |
MERGE (b:User {id:id})
ON CREATE SET
b.created_at = timestamp()
CREATE UNIQUE (a)-[:FOLLOWS]->(b)
)
""").execute(id=id,ids=ids)
def insert_friends2(id,ids):
"""interestingly this one doesn't work
I was trying to seperate out user creating and relationship connection
MERGE is allowed within a FOREACH, but MATCH isn't
"""
all_ids = ids
all_ids.append(id)
for u in all_ids :
neo4j.CypherQuery(graph,"""
MERGE (u:User {id:{id}})
ON CREATE SET
u.created_at = timestamp()
""").execute(id=u)
neo4j.CypherQuery(graph,"""
MATCH (a:User {id:{id}})
SET a.friends_found_at = timestamp()
FOREACH (id IN {ids} |
MATCH (b:User {id:id})
CREATE UNIQUE (a)-[:FOLLOWS]->(b)
)""").execute(id=id,ids=ids)
def insert_friends3(id,ids):
"""here I create users one at a time in a loop
and then connect them one at a time in loop
"""
all_ids = ids
all_ids.append(id)
for u in all_ids :
neo4j.CypherQuery(graph,"""
MERGE (u:User {id:{id}})
ON CREATE SET
u.created_at = timestamp()
""").execute(id=u)
for friend_id in ids :
neo4j.CypherQuery(graph,"""
MATCH (a:User {id:{id}}),(b:User {id:{friend_id}})
SET a.friends_found_at = timestamp()
CREATE UNIQUE (a)-[:FOLLOWS]->(b)
""").execute(id=id,friend_id=friend_id)
def insert_friends4(id,ids):
"""here I create users one at a time but connect them all at once"""
all_ids = ids
all_ids.append(id)
for u in all_ids :
neo4j.CypherQuery(graph,"""
MERGE (u:User {id:{id}})
ON CREATE SET
u.created_at = timestamp()""").execute(id=u)
neo4j.CypherQuery(graph,"""
MATCH (a:User {id:{id}}),(b:User)
WHERE b.id in {ids}
SET a.friends_found_at = timestamp()
CREATE UNIQUE (a)-[:FOLLOWS]->(b)""").execute(id=id,ids=ids)
def insert_friends5(id,ids):
"""here I tried to create all users in one query and connect all users in one query
however, the user creation query is invalid (again MATCH vs MERGE)
"""
all_ids = ids
all_ids.append(id)
for u in all_ids :
neo4j.CypherQuery(graph,"""
MERGE (u:User)
WHERE u.id IN ids
ON CREATE SET
u.created_at = timestamp()""").execute(ids=all_ids)
neo4j.CypherQuery(graph,"""
MATCH (a:User {id:{id}}),(b:User)
WHERE b.id in {ids}
SET a.friends_found_at = timestamp()
CREATE UNIQUE (a)-[:FOLLOWS]->(b)""").execute(id=id,ids=ids)
try_algo(insert_friends1)
try_algo(insert_friends3)
try_algo(insert_friends4)
time:248.569471s; relationship count:51064; node count11155; rel/sec:205.431503; node/sec:44.876790 time:4184.942574s; relationship count:39701; node count9917; rel/sec:9.486630; node/sec:2.369686 time:25518.941203s; relationship count:39639; node count9803; rel/sec:1.553317; node/sec:0.384146