# Just an idea
Valid JSON, only line by line
{"KEY3": "qux0", "KEY2": "baz0", "KEY1": "bar0", "KEY0": "foo0", "POS": 5, "CHROM": "chr3"}
{"KEY3": "qux1", "KEY2": "baz1", "KEY1": "bar1", "KEY0": "foo1", "POS": 6, "CHROM": "chr12"}
{"KEY3": "qux2", "KEY2": "baz2", "KEY1": "bar2", "KEY0": "foo2", "POS": 9, "CHROM": "chr1"}
{"KEY3": "qux3", "KEY2": "baz3", "KEY1": "bar3", "KEY0": "foo3", "POS": 8, "CHROM": "chr12"}
{"KEY3": "qux4", "KEY2": "baz4", "KEY1": "bar4", "KEY0": "foo4", "POS": 3, "CHROM": "chrX"}
{"KEY3": "qux5", "KEY2": "baz5", "KEY1": "bar5", "KEY0": "foo5", "POS": 2, "CHROM": "chrX"}
{"KEY3": "qux6", "KEY2": "baz6", "KEY1": "bar6", "KEY0": "foo6", "POS": 10, "CHROM": "chr3"}
{"KEY3": "qux7", "KEY2": "baz7", "KEY1": "bar7", "KEY0": "foo7", "POS": 9, "CHROM": "chr1"}
{"KEY3": "qux8", "KEY2": "baz8", "KEY1": "bar8", "KEY0": "foo8", "POS": 0, "CHROM": "chr12"}
{"KEY3": "qux9", "KEY2": "baz9", "KEY1": "bar9", "KEY0": "foo9", "POS": 3, "CHROM": "chr3"}
{"KEY3": "qux10", "KEY2": "baz10", "KEY1": "bar10", "KEY0": "foo10", "POS": 8, "CHROM": "chr12"}
{"KEY3": "qux11", "KEY2": "baz11", "KEY1": "bar11", "KEY0": "foo11", "POS": 7, "CHROM": "chrX"}
{"KEY3": "qux12", "KEY2": "baz12", "KEY1": "bar12", "KEY0": "foo12", "POS": 6, "CHROM": "chrX"}
{"KEY3": "qux13", "KEY2": "baz13", "KEY1": "bar13", "KEY0": "foo13", "POS": 10, "CHROM": "chr12"}
{"KEY3": "qux14", "KEY2": "baz14", "KEY1": "bar14", "KEY0": "foo14", "POS": 3, "CHROM": "chrX"}
{"KEY3": "qux15", "KEY2": "baz15", "KEY1": "bar15", "KEY0": "foo15", "POS": 10, "CHROM": "chr3"}
{"KEY3": "qux16", "KEY2": "baz16", "KEY1": "bar16", "KEY0": "foo16", "POS": 2, "CHROM": "chr12"}
{"KEY3": "qux17", "KEY2": "baz17", "KEY1": "bar17", "KEY0": "foo17", "POS": 0, "CHROM": "chr1"}
{"KEY3": "qux18", "KEY2": "baz18", "KEY1": "bar18", "KEY0": "foo18", "POS": 6, "CHROM": "chr3"}
{"KEY3": "qux19", "KEY2": "baz19", "KEY1": "bar19", "KEY0": "foo19", "POS": 0, "CHROM": "chr3"}
Everyline gets a comma, except the last one and it becomes a list
[{"KEY3": "qux0", "KEY2": "baz0", "KEY1": "bar0", "KEY0": "foo0", "POS": 5, "CHROM": "chr3"},
{"KEY3": "qux1", "KEY2": "baz1", "KEY1": "bar1", "KEY0": "foo1", "POS": 6, "CHROM": "chr12"},
{"KEY3": "qux2", "KEY2": "baz2", "KEY1": "bar2", "KEY0": "foo2", "POS": 9, "CHROM": "chr1"},
{"KEY3": "qux3", "KEY2": "baz3", "KEY1": "bar3", "KEY0": "foo3", "POS": 8, "CHROM": "chr12"},
{"KEY3": "qux4", "KEY2": "baz4", "KEY1": "bar4", "KEY0": "foo4", "POS": 3, "CHROM": "chrX"},
{"KEY3": "qux5", "KEY2": "baz5", "KEY1": "bar5", "KEY0": "foo5", "POS": 2, "CHROM": "chrX"},
{"KEY3": "qux6", "KEY2": "baz6", "KEY1": "bar6", "KEY0": "foo6", "POS": 10, "CHROM": "chr3"},
{"KEY3": "qux7", "KEY2": "baz7", "KEY1": "bar7", "KEY0": "foo7", "POS": 9, "CHROM": "chr1"},
{"KEY3": "qux8", "KEY2": "baz8", "KEY1": "bar8", "KEY0": "foo8", "POS": 0, "CHROM": "chr12"},
{"KEY3": "qux9", "KEY2": "baz9", "KEY1": "bar9", "KEY0": "foo9", "POS": 3, "CHROM": "chr3"},
{"KEY3": "qux10", "KEY2": "baz10", "KEY1": "bar10", "KEY0": "foo10", "POS": 8, "CHROM": "chr12"},
{"KEY3": "qux11", "KEY2": "baz11", "KEY1": "bar11", "KEY0": "foo11", "POS": 7, "CHROM": "chrX"},
{"KEY3": "qux12", "KEY2": "baz12", "KEY1": "bar12", "KEY0": "foo12", "POS": 6, "CHROM": "chrX"},
{"KEY3": "qux13", "KEY2": "baz13", "KEY1": "bar13", "KEY0": "foo13", "POS": 10, "CHROM": "chr12"},
{"KEY3": "qux14", "KEY2": "baz14", "KEY1": "bar14", "KEY0": "foo14", "POS": 3, "CHROM": "chrX"},
{"KEY3": "qux15", "KEY2": "baz15", "KEY1": "bar15", "KEY0": "foo15", "POS": 10, "CHROM": "chr3"},
{"KEY3": "qux16", "KEY2": "baz16", "KEY1": "bar16", "KEY0": "foo16", "POS": 2, "CHROM": "chr12"},
{"KEY3": "qux17", "KEY2": "baz17", "KEY1": "bar17", "KEY0": "foo17", "POS": 0, "CHROM": "chr1"},
{"KEY3": "qux18", "KEY2": "baz18", "KEY1": "bar18", "KEY0": "foo18", "POS": 6, "CHROM": "chr3"},
{"KEY3": "qux19", "KEY2": "baz19", "KEY1": "bar19", "KEY0": "foo19", "POS": 0, "CHROM": "chr3"}]
# Now we can put it in a pandas dataframe
converted_json = """[{"KEY3": "qux0", "KEY2": "baz0", "KEY1": "bar0", "KEY0": "foo0", "POS": 5, "CHROM": "chr3"},
{"KEY3": "qux1", "KEY2": "baz1", "KEY1": "bar1", "KEY0": "foo1", "POS": 6, "CHROM": "chr12"},
{"KEY3": "qux2", "KEY2": "baz2", "KEY1": "bar2", "KEY0": "foo2", "POS": 9, "CHROM": "chr1"},
{"KEY3": "qux3", "KEY2": "baz3", "KEY1": "bar3", "KEY0": "foo3", "POS": 8, "CHROM": "chr12"},
{"KEY3": "qux4", "KEY2": "baz4", "KEY1": "bar4", "KEY0": "foo4", "POS": 3, "CHROM": "chrX"},
{"KEY3": "qux5", "KEY2": "baz5", "KEY1": "bar5", "KEY0": "foo5", "POS": 2, "CHROM": "chrX"},
{"KEY3": "qux6", "KEY2": "baz6", "KEY1": "bar6", "KEY0": "foo6", "POS": 10, "CHROM": "chr3"},
{"KEY3": "qux7", "KEY2": "baz7", "KEY1": "bar7", "KEY0": "foo7", "POS": 9, "CHROM": "chr1"},
{"KEY3": "qux8", "KEY2": "baz8", "KEY1": "bar8", "KEY0": "foo8", "POS": 0, "CHROM": "chr12"},
{"KEY3": "qux9", "KEY2": "baz9", "KEY1": "bar9", "KEY0": "foo9", "POS": 3, "CHROM": "chr3"},
{"KEY3": "qux10", "KEY2": "baz10", "KEY1": "bar10", "KEY0": "foo10", "POS": 8, "CHROM": "chr12"},
{"KEY3": "qux11", "KEY2": "baz11", "KEY1": "bar11", "KEY0": "foo11", "POS": 7, "CHROM": "chrX"},
{"KEY3": "qux12", "KEY2": "baz12", "KEY1": "bar12", "KEY0": "foo12", "POS": 6, "CHROM": "chrX"},
{"KEY3": "qux13", "KEY2": "baz13", "KEY1": "bar13", "KEY0": "foo13", "POS": 10, "CHROM": "chr12"},
{"KEY3": "qux14", "KEY2": "baz14", "KEY1": "bar14", "KEY0": "foo14", "POS": 3, "CHROM": "chrX"},
{"KEY3": "qux15", "KEY2": "baz15", "KEY1": "bar15", "KEY0": "foo15", "POS": 10, "CHROM": "chr3"},
{"KEY3": "qux16", "KEY2": "baz16", "KEY1": "bar16", "KEY0": "foo16", "POS": 2, "CHROM": "chr12"},
{"KEY3": "qux17", "KEY2": "baz17", "KEY1": "bar17", "KEY0": "foo17", "POS": 0, "CHROM": "chr1"},
{"KEY3": "qux18", "KEY2": "baz18", "KEY1": "bar18", "KEY0": "foo18", "POS": 6, "CHROM": "chr3"},
{"KEY3": "qux19", "KEY2": "baz19", "KEY1": "bar19", "KEY0": "foo19", "POS": 0, "CHROM": "chr3"}]"""
# faking file, so everything stays in the notebook
import cStringIO
json_input = cStringIO.StringIO()
json_input.write(converted_json)
import pandas as pd
df = pd.read_json(json_input.getvalue())
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 20 entries, 0 to 19 Data columns (total 6 columns): CHROM 20 non-null object KEY0 20 non-null object KEY1 20 non-null object KEY2 20 non-null object KEY3 20 non-null object POS 20 non-null int64 dtypes: int64(1), object(5)
# sort by KEY1
df.sort(columns="KEY1")
CHROM | KEY0 | KEY1 | KEY2 | KEY3 | POS | |
---|---|---|---|---|---|---|
0 | chr3 | foo0 | bar0 | baz0 | qux0 | 5 |
1 | chr12 | foo1 | bar1 | baz1 | qux1 | 6 |
10 | chr12 | foo10 | bar10 | baz10 | qux10 | 8 |
11 | chrX | foo11 | bar11 | baz11 | qux11 | 7 |
12 | chrX | foo12 | bar12 | baz12 | qux12 | 6 |
13 | chr12 | foo13 | bar13 | baz13 | qux13 | 10 |
14 | chrX | foo14 | bar14 | baz14 | qux14 | 3 |
15 | chr3 | foo15 | bar15 | baz15 | qux15 | 10 |
16 | chr12 | foo16 | bar16 | baz16 | qux16 | 2 |
17 | chr1 | foo17 | bar17 | baz17 | qux17 | 0 |
18 | chr3 | foo18 | bar18 | baz18 | qux18 | 6 |
19 | chr3 | foo19 | bar19 | baz19 | qux19 | 0 |
2 | chr1 | foo2 | bar2 | baz2 | qux2 | 9 |
3 | chr12 | foo3 | bar3 | baz3 | qux3 | 8 |
4 | chrX | foo4 | bar4 | baz4 | qux4 | 3 |
5 | chrX | foo5 | bar5 | baz5 | qux5 | 2 |
6 | chr3 | foo6 | bar6 | baz6 | qux6 | 10 |
7 | chr1 | foo7 | bar7 | baz7 | qux7 | 9 |
8 | chr12 | foo8 | bar8 | baz8 | qux8 | 0 |
9 | chr3 | foo9 | bar9 | baz9 | qux9 | 3 |
# view only chr1
df[df.CHROM=="chr1"]
CHROM | KEY0 | KEY1 | KEY2 | KEY3 | POS | |
---|---|---|---|---|---|---|
2 | chr1 | foo2 | bar2 | baz2 | qux2 | 9 |
7 | chr1 | foo7 | bar7 | baz7 | qux7 | 9 |
17 | chr1 | foo17 | bar17 | baz17 | qux17 | 0 |