''' 1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S 2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C ''' ''' { "Fare": 71.2833, "Name": "Cumings, Mrs. John Bradley (Florence Briggs Thayer)", "Embarked": "C", "Age": 38, "Parch": 0, "Pclass": 1, "Sex": "female", "Survived": 1, "SibSp": 1, "PassengerId": 2, "Ticket": "PC 17599", "Cabin": "C85" } ''' # AVRO SCHEMA { "namespace": "avro.example.titanic", "type": "record", "name": "demo", "doc": "Titanic data set", "fields": [ {"name": "PassengerId", "type": "int", "doc": "Passenger ID"}, {"name": "Survived", "type": "int", "doc": "Survival (0 = No; 1 = Yes)"}, {"name": "Pclass", "type": "int", "doc": "Passenger Class (1 = 1st; 2 = 2nd; 3 = 3rd)"}, {"name": "Name", "type": "string", "doc": "Name"}, {"name": "Sex", "type": "string", "doc": "Sex"}, {"name": "Age", "type": "int", "doc": "Age"}, {"name": "SibSp", "type": "int", "doc": "Number of Siblings/Spouses Aboard"}, {"name": "Parch", "type": "int", "doc": "Number of Parents/Children Aboard"}, {"name": "Ticket", "type": "string", "doc": "Ticket Number"}, {"name": "Fare", "type": "float", "doc": "Passenger Fare"}, {"name": "Cabin", "type": "string", "doc": "Cabin"}, {"name": "Embarked", {"type":"enum","symbols":["C","S","Q"], "doc": "Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)"} ] } import avro.schema import io, random from avro.io import DatumWriter, BinaryEncoder from avro.datafile import DataFileReader, DataFileWriter # Path to user.avsc avro schema schema_path="schemas/titanic.avsc" schema = avro.schema.parse(open(schema_path).read()) writer = DataFileWriter(open("avroFiles/titanic.avro",'w'), DatumWriter(), schema, codec="null") writer.append({"Fare": 71.2833, "Name": "Cumings, Mrs. John Bradley (Florence Briggs Thayer)", "Embarked": "C", "Age": 38, "Parch": 0, "Pclass": 1, "Sex": "female", "Survived": 1, "SibSp": 1, "PassengerId": 2, "Ticket": "PC 17599", "Cabin": "C85"}) writer.append({"Fare": 7.925, "Name": "Heikkinen, Miss. Laina", "Embarked": "S", "Age": 26, "Parch": 0, "Pclass": 3, "Sex": "female", "Survived": 1, "SibSp": 0, "PassengerId": 3, "Ticket": "STON/O2. 3101282", "Cabin": ""}) writer.close() # Schema Version1 { "type": "record", "name": "Employee", "fields": [ {"name": "name", "type": "string"}, {"name": "age", "type": "int"}, {"name": "emails", "type": {"type": "array", "items": "string"}}, {"name": "boss", "type": ["Employee","null"]} ] } # Schema Version2 { "type": "record", "name": "Employee", "fields": [ {"name": "name", "type": "string"}, {"name": "yrs", "type": "int", "aliases": ["age"]}, {"name": "gender", "type": "string", "default":"unknown"}, {"name": "emails", "type": {"type": "array", "items": "string"}} ] }