import datetime import random from py2neo import neo4j graph = neo4j.GraphDatabaseService("http://localhost:7474/db/data") try: neo4j.CypherQuery(graph,""" CREATE CONSTRAINT ON (u:User). ASSERT u.id IS UNIQUE """).execute() except: pass def delete_everything(): neo4j.CypherQuery(graph,"MATCH ()-[r]-() DELETE r").run() neo4j.CypherQuery(graph,"MATCH (n) DELETE n").run() def count_relationships(): return neo4j.CypherQuery(graph,"MATCH ()-[r]-() RETURN COUNT(r)").execute_one() def count_nodes(): return neo4j.CypherQuery(graph,"MATCH (n) RETURN COUNT(n)").execute_one() def rand_id(): """gets a random id It's equally likely to get an id from 0-5000 as it is to get an id from 5000-inf. """ return int(random.expovariate(1.0/5000)) def try_algo(insert_friends_algo): """uses algorithm to insert random relationship For 100 iterations this inserts a user and their 200 friends. User and friend ids are selected at random. Prints time required to insert. All relationships and nodes are removed after test completes. """ start_time = datetime.datetime.now() for i in range(100): id = rand_id() num_friends = 200 ids = [] for j in range(num_friends): ids.append(rand_id()) insert_friends_algo(id,ids) time_required = (datetime.datetime.now() - start_time).total_seconds() rel_count = count_relationships() node_count = count_nodes() delete_everything() print "time:%fs;\trelationship count:%d;\tnode count%d; rel/sec:%f;\tnode/sec:%f" % (time_required,rel_count,node_count,rel_count/time_required,node_count/time_required) def insert_friends1(id,ids): """this was my original query""" neo4j.CypherQuery(graph,""" MERGE (a:User {id:{id}}) ON CREATE SET a.created_at = timestamp() SET a.friends_found_at = timestamp() FOREACH (id IN {ids} | MERGE (b:User {id:id}) ON CREATE SET b.created_at = timestamp() CREATE UNIQUE (a)-[:FOLLOWS]->(b) ) """).execute(id=id,ids=ids) def insert_friends2(id,ids): """interestingly this one doesn't work I was trying to seperate out user creating and relationship connection MERGE is allowed within a FOREACH, but MATCH isn't """ all_ids = ids all_ids.append(id) for u in all_ids : neo4j.CypherQuery(graph,""" MERGE (u:User {id:{id}}) ON CREATE SET u.created_at = timestamp() """).execute(id=u) neo4j.CypherQuery(graph,""" MATCH (a:User {id:{id}}) SET a.friends_found_at = timestamp() FOREACH (id IN {ids} | MATCH (b:User {id:id}) CREATE UNIQUE (a)-[:FOLLOWS]->(b) )""").execute(id=id,ids=ids) def insert_friends3(id,ids): """here I create users one at a time in a loop and then connect them one at a time in loop """ all_ids = ids all_ids.append(id) for u in all_ids : neo4j.CypherQuery(graph,""" MERGE (u:User {id:{id}}) ON CREATE SET u.created_at = timestamp() """).execute(id=u) for friend_id in ids : neo4j.CypherQuery(graph,""" MATCH (a:User {id:{id}}),(b:User {id:{friend_id}}) SET a.friends_found_at = timestamp() CREATE UNIQUE (a)-[:FOLLOWS]->(b) """).execute(id=id,friend_id=friend_id) def insert_friends4(id,ids): """here I create users one at a time but connect them all at once""" all_ids = ids all_ids.append(id) for u in all_ids : neo4j.CypherQuery(graph,""" MERGE (u:User {id:{id}}) ON CREATE SET u.created_at = timestamp()""").execute(id=u) neo4j.CypherQuery(graph,""" MATCH (a:User {id:{id}}),(b:User) WHERE b.id in {ids} SET a.friends_found_at = timestamp() CREATE UNIQUE (a)-[:FOLLOWS]->(b)""").execute(id=id,ids=ids) def insert_friends5(id,ids): """here I tried to create all users in one query and connect all users in one query however, the user creation query is invalid (again MATCH vs MERGE) """ all_ids = ids all_ids.append(id) for u in all_ids : neo4j.CypherQuery(graph,""" MERGE (u:User) WHERE u.id IN ids ON CREATE SET u.created_at = timestamp()""").execute(ids=all_ids) neo4j.CypherQuery(graph,""" MATCH (a:User {id:{id}}),(b:User) WHERE b.id in {ids} SET a.friends_found_at = timestamp() CREATE UNIQUE (a)-[:FOLLOWS]->(b)""").execute(id=id,ids=ids) try_algo(insert_friends1) try_algo(insert_friends3) try_algo(insert_friends4)