require 'nyaplot'
require 'bionya'
true
path = File.expand_path("../data/operon.csv", __FILE__)
df_operon = Nyaplot::DataFrame.from_csv(path)
genename | geneannotation | operonname | operoncomments | probeset | locbegin | locend | |
---|---|---|---|---|---|---|---|
1 | thrA | aspartokinase I/homoserine dehydrogenase I | thr | threonine biosynthesis | thrA_b0002_st | 337 | 2799 |
2 | thrB | homoserine kinase | thr | threonine biosynthesis | thrB_b0003_st | 2801 | 3733 |
3 | thrC | threonine synthase | thr | threonine biosynthesis | thrC_b0004_st | 3734 | 5020 |
4 | dnaK | DNA biosynthesis; heat shock protein | dnaK | DNA biosynthesis | dnaK_b0014_st | 12163 | 14079 |
5 | dnaJ | DNA biosynthesis; heat shock protein | dnaK | DNA biosynthesis | dnaJ_b0015_st | 14168 | 15298 |
6 | yaaC | 35K hypothetical | ileS-lsp | tRNA synthetase & peptidase | ribF_b0025_st | 21407 | 22348 |
7 | ileS | isoleucine tRNA synthetase (EC 6.1.1.5) | ileS-lsp | tRNA synthetase & peptidase | ileS_b0026_st | 22391 | 25206 |
8 | lspA | prolipoprotein signal peptidase (SPaseII) (EC 3.4 99 35) | ileS-lsp | tRNA synthetase & peptidase | lspA_b0027_st | 25208 | 25701 |
9 | carA | carbamoyl-phosphate synthetase subunit A | carAB | carbamoyl-phosphate synthetase | carA_b0032_st | 29651 | 30799 |
10 | carB | carbamoyl-phosphate synthetase subunit B | carAB | carbamoyl-phosphate synthetase | carB_b0033_st | 30817 | 34038 |
11 | caiT | probable carnitine transporter | cai | probable carnitine operon | caiT_b0040_st | 40417 | 41931 |
12 | caiA | probable carnitine operon oxidoreductase CaiA | cai | probable carnitine operon | caiA_b0039_st | 39244 | 40386 |
13 | caiB | L-carnitine dehydratase | cai | probable carnitine operon | caiB_b0038_st | 37898 | 39115 |
14 | caiC | probable crotonobetaine/carnitine-CoA ligase | cai | probable carnitine operon | caiC_b0037_st | 36271 | 37839 |
15 | caiD | carnitine racemase | cai | probable carnitine operon | caiD_b0036_st | 35393 | 36270 |
16 | caiE | carnitine operon protein CaiE | cai | probable carnitine operon | caiE_b0035_st | 34781 | 35376 |
... | ... | ... | ... | ... | ... | ... | ... |
548 | creD | inner membrane protein CreD | creABCD | phosphate sensor | creD_b4400_st | 4635747 | 4637099 |
begin_arr = [0]; end_arr=[336]; size_arr=[337]; if_operon_arr=["no"]; name_arr = [SecureRandom.uuid ]; df_arr = []
df_operon.filter! {|row| !(row[:locbegin].is_a?(String) || row[:locend].is_a?(String))}
df_operon.column(:operonname).to_a.uniq.each do |name|
df_part = df_operon.filter{|row| row[:operonname].to_s == name.to_s}
operon_begin = df_part.column(:locbegin).to_a.min
operon_end = df_part.locend.to_a.max
prev_end = end_arr.clone.pop
if operon_begin - prev_end > 1
name_arr.push(SecureRandom.uuid)
begin_arr.push(prev_end+1)
end_arr.push(operon_begin-1)
size_arr.push(operon_begin - prev_end -1)
if_operon_arr.push("no")
end
name_arr.push(name)
begin_arr.push(operon_begin)
end_arr.push(operon_end)
size_arr.push(operon_end - operon_begin + 1)
if_operon_arr.push("yes")
end
df_operon_info = Nyaplot::DataFrame.new({name: name_arr, begin: begin_arr, end: end_arr, size: size_arr, if_operon: if_operon_arr})
df_operon_info
name | begin | end | size | if_operon |
---|---|---|---|---|
e17eaf65-8992-477d-afb3-68529cab2c82 | 0 | 336 | 337 | no |
thr | 337 | 5020 | 4684 | yes |
2095ab6d-0e82-4d9c-b643-b3f4bdf4cc23 | 5021 | 12162 | 7142 | no |
dnaK | 12163 | 15298 | 3136 | yes |
398c2464-338d-4fbd-b9ee-95b600f592c8 | 15299 | 21406 | 6108 | no |
ileS-lsp | 21407 | 25701 | 4295 | yes |
7d9bb493-9b36-4158-aace-15af8ca30370 | 25702 | 29650 | 3949 | no |
carAB | 29651 | 34038 | 4388 | yes |
5a6f8966-32de-46e1-bf6a-9f81cf85e3d3 | 34039 | 34780 | 742 | no |
cai | 34781 | 41931 | 7151 | yes |
121e64d2-6da7-4284-89cd-1db9d2fb5904 | 41932 | 42366 | 435 | no |
fix | 42367 | 45462 | 3096 | yes |
0c0a594e-1d60-46b1-b49e-e540d3e398f2 | 45463 | 50379 | 4917 | no |
surA-pdxA-ksgA-apaGH | 50380 | 54702 | 4323 | yes |
505b5b25-b3d3-4f10-9eef-c34ebe2122e1 | 54703 | 65854 | 11152 | no |
araBAD | 65855 | 70048 | 4194 | yes |
... | ... | ... | ... | ... |
creABCD | 4633090 | 4637099 | 4010 | yes |
df_arr = []
df_operon_info.each_row do |row|
axis_arr=[]
row[:begin].step(row[:end], 1000){|val| axis_arr.push(val)}
val_arr=Array.new(axis_arr.length, -0)
name_arr=Array.new(axis_arr.length, '')
df_arr.push(Nyaplot::DataFrame.new({axis: axis_arr, val: val_arr, name: name_arr}))
end
df_operon_info.df = df_arr
df_operon_info
name | begin | end | size | if_operon | df | |||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
e17eaf65-8992-477d-afb3-68529cab2c82 | 0 | 336 | 337 | no |
| |||||||||||||||||||||||||||||||||||||||
thr | 337 | 5020 | 4684 | yes |
| |||||||||||||||||||||||||||||||||||||||
2095ab6d-0e82-4d9c-b643-b3f4bdf4cc23 | 5021 | 12162 | 7142 | no |
| |||||||||||||||||||||||||||||||||||||||
dnaK | 12163 | 15298 | 3136 | yes |
| |||||||||||||||||||||||||||||||||||||||
398c2464-338d-4fbd-b9ee-95b600f592c8 | 15299 | 21406 | 6108 | no |
| |||||||||||||||||||||||||||||||||||||||
ileS-lsp | 21407 | 25701 | 4295 | yes |
| |||||||||||||||||||||||||||||||||||||||
7d9bb493-9b36-4158-aace-15af8ca30370 | 25702 | 29650 | 3949 | no |
| |||||||||||||||||||||||||||||||||||||||
carAB | 29651 | 34038 | 4388 | yes |
| |||||||||||||||||||||||||||||||||||||||
5a6f8966-32de-46e1-bf6a-9f81cf85e3d3 | 34039 | 34780 | 742 | no |
| |||||||||||||||||||||||||||||||||||||||
cai | 34781 | 41931 | 7151 | yes |
| |||||||||||||||||||||||||||||||||||||||
121e64d2-6da7-4284-89cd-1db9d2fb5904 | 41932 | 42366 | 435 | no |
| |||||||||||||||||||||||||||||||||||||||
fix | 42367 | 45462 | 3096 | yes |
| |||||||||||||||||||||||||||||||||||||||
0c0a594e-1d60-46b1-b49e-e540d3e398f2 | 45463 | 50379 | 4917 | no |
| |||||||||||||||||||||||||||||||||||||||
surA-pdxA-ksgA-apaGH | 50380 | 54702 | 4323 | yes |
| |||||||||||||||||||||||||||||||||||||||
505b5b25-b3d3-4f10-9eef-c34ebe2122e1 | 54703 | 65854 | 11152 | no |
| |||||||||||||||||||||||||||||||||||||||
araBAD | 65855 | 70048 | 4194 | yes |
| |||||||||||||||||||||||||||||||||||||||
... | ... | ... | ... | ... | ... | |||||||||||||||||||||||||||||||||||||||
creABCD | 4633090 | 4637099 | 4010 | yes |
|
path = File.expand_path("../data/ttest.csv", __FILE__)
df_ttest = Nyaplot::DataFrame.from_csv(path)
name | statistic | dm | pvalue |
---|---|---|---|
aceA_b4015_st | -2.66477534128427 | -0.231420053466338 | 0.0372855953591475 |
alpA_b2624_st | -2.59430119116714 | -0.314091063837314 | 0.040974544243199 |
amn_b1982_st | 2.50075233999086 | 0.300768027982889 | 0.0464808997782366 |
ampE_b0111_st | 3.22741963510934 | 0.491886566026404 | 0.0179686874331823 |
aroA_b0908_st | -2.6281727788343 | -0.677562777505308 | 0.0391552776749673 |
arp_b4017_st | -3.09780159566518 | -0.203635693987595 | 0.0211758093340619 |
artI_b0863_st | 3.91803669738052 | 1.11440735214105 | 0.00781965224654806 |
artP_b0864_st | 3.03766268937103 | 0.767133218653141 | 0.0228714672660835 |
asd_b3433_st | 2.85224988311835 | 0.371215322942883 | 0.0290956715334605 |
aspC_b0928_st | -3.73505182820475 | -0.557002362309087 | 0.00967879356954946 |
atpB_b3738_st | -4.47240462758734 | -0.644712240554911 | 0.00422655258946847 |
atpD_b3732_st | -2.58770662044562 | -0.694185677478369 | 0.0413390790459209 |
atpF_b3736_st | -2.57794520925451 | -0.478497937246775 | 0.0418850057120005 |
atpG_b3733_st | -5.94520060825289 | -0.598274871270332 | 0.00101205103480581 |
atpI_b3739_st | -2.6778774179882 | -0.604098316763061 | 0.0366396699210827 |
b0836_st | 2.51445251943079 | 1.23976163226646 | 0.0456278087140453 |
... | ... | ... | ... |
ytfK_b4217_st | 7.81549357554911 | 2.64137844388281 | 0.000231537137148067 |
df_operon
genename | geneannotation | operonname | operoncomments | probeset | locbegin | locend | |
---|---|---|---|---|---|---|---|
1 | thrA | aspartokinase I/homoserine dehydrogenase I | thr | threonine biosynthesis | thrA_b0002_st | 337 | 2799 |
2 | thrB | homoserine kinase | thr | threonine biosynthesis | thrB_b0003_st | 2801 | 3733 |
3 | thrC | threonine synthase | thr | threonine biosynthesis | thrC_b0004_st | 3734 | 5020 |
4 | dnaK | DNA biosynthesis; heat shock protein | dnaK | DNA biosynthesis | dnaK_b0014_st | 12163 | 14079 |
5 | dnaJ | DNA biosynthesis; heat shock protein | dnaK | DNA biosynthesis | dnaJ_b0015_st | 14168 | 15298 |
6 | yaaC | 35K hypothetical | ileS-lsp | tRNA synthetase & peptidase | ribF_b0025_st | 21407 | 22348 |
7 | ileS | isoleucine tRNA synthetase (EC 6.1.1.5) | ileS-lsp | tRNA synthetase & peptidase | ileS_b0026_st | 22391 | 25206 |
8 | lspA | prolipoprotein signal peptidase (SPaseII) (EC 3.4 99 35) | ileS-lsp | tRNA synthetase & peptidase | lspA_b0027_st | 25208 | 25701 |
9 | carA | carbamoyl-phosphate synthetase subunit A | carAB | carbamoyl-phosphate synthetase | carA_b0032_st | 29651 | 30799 |
10 | carB | carbamoyl-phosphate synthetase subunit B | carAB | carbamoyl-phosphate synthetase | carB_b0033_st | 30817 | 34038 |
11 | caiT | probable carnitine transporter | cai | probable carnitine operon | caiT_b0040_st | 40417 | 41931 |
12 | caiA | probable carnitine operon oxidoreductase CaiA | cai | probable carnitine operon | caiA_b0039_st | 39244 | 40386 |
13 | caiB | L-carnitine dehydratase | cai | probable carnitine operon | caiB_b0038_st | 37898 | 39115 |
14 | caiC | probable crotonobetaine/carnitine-CoA ligase | cai | probable carnitine operon | caiC_b0037_st | 36271 | 37839 |
15 | caiD | carnitine racemase | cai | probable carnitine operon | caiD_b0036_st | 35393 | 36270 |
16 | caiE | carnitine operon protein CaiE | cai | probable carnitine operon | caiE_b0035_st | 34781 | 35376 |
... | ... | ... | ... | ... | ... | ... | ... |
548 | creD | inner membrane protein CreD | creABCD | phosphate sensor | creD_b4400_st | 4635747 | 4637099 |
df_operon.filter{|row| row[:probeset] == "aceA_b4015_st"}.column(:locbegin).to_a[0]
4214688
df_ttest.each_row do |row1|
name = row1[:name]
locbegin = df_operon.filter{|row| row[:probeset] == name}.column(:locbegin).to_a[0]
next if locbegin.nil?
df_operon_info.each_row do |row|
if locbegin > row[:begin] && locbegin < row[:end]
row[:df].each_row do |nest_row|
if locbegin < nest_row[:axis] + 1000 && locbegin > nest_row[:axis]
nest_row[:val] = row1[:dm]
nest_row[:name] = row1[:name]
end
end
end
end
end
df_operon_info
name | begin | end | size | if_operon | df | |||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
e17eaf65-8992-477d-afb3-68529cab2c82 | 0 | 336 | 337 | no |
| |||||||||||||||||||||||||||||||||||||||
thr | 337 | 5020 | 4684 | yes |
| |||||||||||||||||||||||||||||||||||||||
2095ab6d-0e82-4d9c-b643-b3f4bdf4cc23 | 5021 | 12162 | 7142 | no |
| |||||||||||||||||||||||||||||||||||||||
dnaK | 12163 | 15298 | 3136 | yes |
| |||||||||||||||||||||||||||||||||||||||
398c2464-338d-4fbd-b9ee-95b600f592c8 | 15299 | 21406 | 6108 | no |
| |||||||||||||||||||||||||||||||||||||||
ileS-lsp | 21407 | 25701 | 4295 | yes |
| |||||||||||||||||||||||||||||||||||||||
7d9bb493-9b36-4158-aace-15af8ca30370 | 25702 | 29650 | 3949 | no |
| |||||||||||||||||||||||||||||||||||||||
carAB | 29651 | 34038 | 4388 | yes |
| |||||||||||||||||||||||||||||||||||||||
5a6f8966-32de-46e1-bf6a-9f81cf85e3d3 | 34039 | 34780 | 742 | no |
| |||||||||||||||||||||||||||||||||||||||
cai | 34781 | 41931 | 7151 | yes |
| |||||||||||||||||||||||||||||||||||||||
121e64d2-6da7-4284-89cd-1db9d2fb5904 | 41932 | 42366 | 435 | no |
| |||||||||||||||||||||||||||||||||||||||
fix | 42367 | 45462 | 3096 | yes |
| |||||||||||||||||||||||||||||||||||||||
0c0a594e-1d60-46b1-b49e-e540d3e398f2 | 45463 | 50379 | 4917 | no |
| |||||||||||||||||||||||||||||||||||||||
surA-pdxA-ksgA-apaGH | 50380 | 54702 | 4323 | yes |
| |||||||||||||||||||||||||||||||||||||||
505b5b25-b3d3-4f10-9eef-c34ebe2122e1 | 54703 | 65854 | 11152 | no |
| |||||||||||||||||||||||||||||||||||||||
araBAD | 65855 | 70048 | 4194 | yes |
| |||||||||||||||||||||||||||||||||||||||
... | ... | ... | ... | ... | ... | |||||||||||||||||||||||||||||||||||||||
creABCD | 4633090 | 4637099 | 4010 | yes |
|
df_operon_info.each_row do |row|
unless row[:df].val.to_a.all?{|val| !val.nil?}
print "hoge"
end
end
""
""
plot = Nyaplot::CircularPlot.new(df_operon_info, :name, :df)
plot.color(['#999999','#ef8a62'])
plot.fill_by(:if_operon)
arc = plot.add(1, :arc, :axis, :val)
arc.color(["#a50026"])
labels = plot.add(2, :labels, :axis, :name)
labels.text_size(0.5)
plot.text_size("0")
plot.padding(0.2)
plot.show
plot.group_by
:name