In [1]:
require 'nyaplot'
require 'bionya'
Out[1]:
Out[1]:
Out[1]:
true
In [2]:
path = File.expand_path("../data/operon.csv", __FILE__)
df_operon = Nyaplot::DataFrame.from_csv(path)
Out[2]:
genenamegeneannotationoperonnameoperoncommentsprobesetlocbeginlocend
1thrAaspartokinase I/homoserine dehydrogenase Ithrthreonine biosynthesisthrA_b0002_st3372799
2thrBhomoserine kinasethrthreonine biosynthesisthrB_b0003_st28013733
3thrCthreonine synthasethrthreonine biosynthesisthrC_b0004_st37345020
4dnaKDNA biosynthesis; heat shock proteindnaKDNA biosynthesisdnaK_b0014_st1216314079
5dnaJDNA biosynthesis; heat shock proteindnaKDNA biosynthesisdnaJ_b0015_st1416815298
6yaaC35K hypotheticalileS-lsptRNA synthetase & peptidaseribF_b0025_st2140722348
7ileSisoleucine tRNA synthetase (EC 6.1.1.5)ileS-lsptRNA synthetase & peptidaseileS_b0026_st2239125206
8lspAprolipoprotein signal peptidase (SPaseII) (EC 3.4 99 35)ileS-lsptRNA synthetase & peptidaselspA_b0027_st2520825701
9carAcarbamoyl-phosphate synthetase subunit AcarABcarbamoyl-phosphate synthetasecarA_b0032_st2965130799
10carBcarbamoyl-phosphate synthetase subunit BcarABcarbamoyl-phosphate synthetasecarB_b0033_st3081734038
11caiTprobable carnitine transportercaiprobable carnitine operoncaiT_b0040_st4041741931
12caiAprobable carnitine operon oxidoreductase CaiAcaiprobable carnitine operoncaiA_b0039_st3924440386
13caiBL-carnitine dehydratasecaiprobable carnitine operoncaiB_b0038_st3789839115
14caiCprobable crotonobetaine/carnitine-CoA ligasecaiprobable carnitine operoncaiC_b0037_st3627137839
15caiDcarnitine racemasecaiprobable carnitine operoncaiD_b0036_st3539336270
16caiEcarnitine operon protein CaiEcaiprobable carnitine operoncaiE_b0035_st3478135376
........................
548creDinner membrane protein CreDcreABCDphosphate sensorcreD_b4400_st46357474637099
In [3]:
begin_arr = [0]; end_arr=[336]; size_arr=[337]; if_operon_arr=["no"]; name_arr = [SecureRandom.uuid ]; df_arr = []
df_operon.filter! {|row| !(row[:locbegin].is_a?(String) || row[:locend].is_a?(String))}

df_operon.column(:operonname).to_a.uniq.each do |name|
  df_part = df_operon.filter{|row| row[:operonname].to_s == name.to_s}
  operon_begin = df_part.column(:locbegin).to_a.min
  operon_end = df_part.locend.to_a.max
  
  prev_end = end_arr.clone.pop
  if operon_begin - prev_end > 1
    name_arr.push(SecureRandom.uuid)
    begin_arr.push(prev_end+1)
    end_arr.push(operon_begin-1)
    size_arr.push(operon_begin - prev_end -1)
    if_operon_arr.push("no")
  end
  
  name_arr.push(name)
  begin_arr.push(operon_begin)
  end_arr.push(operon_end)
  size_arr.push(operon_end - operon_begin + 1)
  if_operon_arr.push("yes")
end
df_operon_info = Nyaplot::DataFrame.new({name: name_arr, begin: begin_arr, end: end_arr, size: size_arr, if_operon: if_operon_arr})
df_operon_info
Out[3]:
namebeginendsizeif_operon
e17eaf65-8992-477d-afb3-68529cab2c820336337no
thr33750204684yes
2095ab6d-0e82-4d9c-b643-b3f4bdf4cc235021121627142no
dnaK12163152983136yes
398c2464-338d-4fbd-b9ee-95b600f592c815299214066108no
ileS-lsp21407257014295yes
7d9bb493-9b36-4158-aace-15af8ca3037025702296503949no
carAB29651340384388yes
5a6f8966-32de-46e1-bf6a-9f81cf85e3d33403934780742no
cai34781419317151yes
121e64d2-6da7-4284-89cd-1db9d2fb59044193242366435no
fix42367454623096yes
0c0a594e-1d60-46b1-b49e-e540d3e398f245463503794917no
surA-pdxA-ksgA-apaGH50380547024323yes
505b5b25-b3d3-4f10-9eef-c34ebe2122e1547036585411152no
araBAD65855700484194yes
...............
creABCD463309046370994010yes
In [4]:
df_arr = []
df_operon_info.each_row do |row|
  axis_arr=[]
  row[:begin].step(row[:end], 1000){|val| axis_arr.push(val)}
  val_arr=Array.new(axis_arr.length, -0)
  name_arr=Array.new(axis_arr.length, '')
  df_arr.push(Nyaplot::DataFrame.new({axis: axis_arr, val: val_arr, name: name_arr}))
end
df_operon_info.df = df_arr
df_operon_info
Out[4]:
namebeginendsizeif_operondf
e17eaf65-8992-477d-afb3-68529cab2c820336337no
axisvalname
00
thr33750204684yes
axisvalname
3370
13370
23370
33370
43370
2095ab6d-0e82-4d9c-b643-b3f4bdf4cc235021121627142no
axisvalname
50210
60210
70210
80210
90210
100210
110210
120210
dnaK12163152983136yes
axisvalname
121630
131630
141630
151630
398c2464-338d-4fbd-b9ee-95b600f592c815299214066108no
axisvalname
152990
162990
172990
182990
192990
202990
212990
ileS-lsp21407257014295yes
axisvalname
214070
224070
234070
244070
254070
7d9bb493-9b36-4158-aace-15af8ca3037025702296503949no
axisvalname
257020
267020
277020
287020
carAB29651340384388yes
axisvalname
296510
306510
316510
326510
336510
5a6f8966-32de-46e1-bf6a-9f81cf85e3d33403934780742no
axisvalname
340390
cai34781419317151yes
axisvalname
347810
357810
367810
377810
387810
397810
407810
417810
121e64d2-6da7-4284-89cd-1db9d2fb59044193242366435no
axisvalname
419320
fix42367454623096yes
axisvalname
423670
433670
443670
453670
0c0a594e-1d60-46b1-b49e-e540d3e398f245463503794917no
axisvalname
454630
464630
474630
484630
494630
surA-pdxA-ksgA-apaGH50380547024323yes
axisvalname
503800
513800
523800
533800
543800
505b5b25-b3d3-4f10-9eef-c34ebe2122e1547036585411152no
axisvalname
547030
557030
567030
577030
587030
597030
607030
617030
627030
637030
647030
657030
araBAD65855700484194yes
axisvalname
658550
668550
678550
688550
698550
..................
creABCD463309046370994010yes
axisvalname
46330900
46340900
46350900
46360900
46370900
In [5]:
path = File.expand_path("../data/ttest.csv", __FILE__)
df_ttest = Nyaplot::DataFrame.from_csv(path)
Out[5]:
namestatisticdmpvalue
aceA_b4015_st-2.66477534128427-0.2314200534663380.0372855953591475
alpA_b2624_st-2.59430119116714-0.3140910638373140.040974544243199
amn_b1982_st2.500752339990860.3007680279828890.0464808997782366
ampE_b0111_st3.227419635109340.4918865660264040.0179686874331823
aroA_b0908_st-2.6281727788343-0.6775627775053080.0391552776749673
arp_b4017_st-3.09780159566518-0.2036356939875950.0211758093340619
artI_b0863_st3.918036697380521.114407352141050.00781965224654806
artP_b0864_st3.037662689371030.7671332186531410.0228714672660835
asd_b3433_st2.852249883118350.3712153229428830.0290956715334605
aspC_b0928_st-3.73505182820475-0.5570023623090870.00967879356954946
atpB_b3738_st-4.47240462758734-0.6447122405549110.00422655258946847
atpD_b3732_st-2.58770662044562-0.6941856774783690.0413390790459209
atpF_b3736_st-2.57794520925451-0.4784979372467750.0418850057120005
atpG_b3733_st-5.94520060825289-0.5982748712703320.00101205103480581
atpI_b3739_st-2.6778774179882-0.6040983167630610.0366396699210827
b0836_st2.514452519430791.239761632266460.0456278087140453
............
ytfK_b4217_st7.815493575549112.641378443882810.000231537137148067
In [6]:
df_operon
Out[6]:
genenamegeneannotationoperonnameoperoncommentsprobesetlocbeginlocend
1thrAaspartokinase I/homoserine dehydrogenase Ithrthreonine biosynthesisthrA_b0002_st3372799
2thrBhomoserine kinasethrthreonine biosynthesisthrB_b0003_st28013733
3thrCthreonine synthasethrthreonine biosynthesisthrC_b0004_st37345020
4dnaKDNA biosynthesis; heat shock proteindnaKDNA biosynthesisdnaK_b0014_st1216314079
5dnaJDNA biosynthesis; heat shock proteindnaKDNA biosynthesisdnaJ_b0015_st1416815298
6yaaC35K hypotheticalileS-lsptRNA synthetase & peptidaseribF_b0025_st2140722348
7ileSisoleucine tRNA synthetase (EC 6.1.1.5)ileS-lsptRNA synthetase & peptidaseileS_b0026_st2239125206
8lspAprolipoprotein signal peptidase (SPaseII) (EC 3.4 99 35)ileS-lsptRNA synthetase & peptidaselspA_b0027_st2520825701
9carAcarbamoyl-phosphate synthetase subunit AcarABcarbamoyl-phosphate synthetasecarA_b0032_st2965130799
10carBcarbamoyl-phosphate synthetase subunit BcarABcarbamoyl-phosphate synthetasecarB_b0033_st3081734038
11caiTprobable carnitine transportercaiprobable carnitine operoncaiT_b0040_st4041741931
12caiAprobable carnitine operon oxidoreductase CaiAcaiprobable carnitine operoncaiA_b0039_st3924440386
13caiBL-carnitine dehydratasecaiprobable carnitine operoncaiB_b0038_st3789839115
14caiCprobable crotonobetaine/carnitine-CoA ligasecaiprobable carnitine operoncaiC_b0037_st3627137839
15caiDcarnitine racemasecaiprobable carnitine operoncaiD_b0036_st3539336270
16caiEcarnitine operon protein CaiEcaiprobable carnitine operoncaiE_b0035_st3478135376
........................
548creDinner membrane protein CreDcreABCDphosphate sensorcreD_b4400_st46357474637099
In [7]:
df_operon.filter{|row| row[:probeset] == "aceA_b4015_st"}.column(:locbegin).to_a[0]
Out[7]:
4214688
In [8]:
df_ttest.each_row do |row1|
  name = row1[:name]
  locbegin = df_operon.filter{|row| row[:probeset] == name}.column(:locbegin).to_a[0]
  next if locbegin.nil?
  df_operon_info.each_row do |row|
    if locbegin > row[:begin] && locbegin < row[:end]
      row[:df].each_row do |nest_row|
        if locbegin < nest_row[:axis] + 1000 && locbegin > nest_row[:axis]
          nest_row[:val] = row1[:dm]
          nest_row[:name] = row1[:name]
        end
      end
    end
  end
end
df_operon_info
Out[8]:
namebeginendsizeif_operondf
e17eaf65-8992-477d-afb3-68529cab2c820336337no
axisvalname
00
thr33750204684yes
axisvalname
3370
13370
23370
33370
43370
2095ab6d-0e82-4d9c-b643-b3f4bdf4cc235021121627142no
axisvalname
50210
60210
70210
80210
90210
100210
110210
120210
dnaK12163152983136yes
axisvalname
121630
131630
141630
151630
398c2464-338d-4fbd-b9ee-95b600f592c815299214066108no
axisvalname
152990
162990
172990
182990
192990
202990
212990
ileS-lsp21407257014295yes
axisvalname
214070
224070
234070
244070
254070
7d9bb493-9b36-4158-aace-15af8ca3037025702296503949no
axisvalname
257020
267020
277020
287020
carAB29651340384388yes
axisvalname
296510
306510
316510
326510
336510
5a6f8966-32de-46e1-bf6a-9f81cf85e3d33403934780742no
axisvalname
340390
cai34781419317151yes
axisvalname
347810
357810
367810
377810
387810
397810
407810
417810
121e64d2-6da7-4284-89cd-1db9d2fb59044193242366435no
axisvalname
419320
fix42367454623096yes
axisvalname
423670
433670
443670
453670
0c0a594e-1d60-46b1-b49e-e540d3e398f245463503794917no
axisvalname
454630
464630
474630
484630
494630
surA-pdxA-ksgA-apaGH50380547024323yes
axisvalname
503800
513800
523800
533800
543800
505b5b25-b3d3-4f10-9eef-c34ebe2122e1547036585411152no
axisvalname
547030
557030
567030
577030
587030
597030
607030
617030
627030
637030
647030
657030
araBAD65855700484194yes
axisvalname
658550
668550
678550
688550
698550
..................
creABCD463309046370994010yes
axisvalname
46330900
46340900
46350900
46360900
46370900
In [9]:
df_operon_info.each_row do |row|
  unless row[:df].val.to_a.all?{|val| !val.nil?}
    print "hoge"
  end
end
""
Out[9]:
""
In [10]:
plot = Nyaplot::CircularPlot.new(df_operon_info, :name, :df)
plot.color(['#999999','#ef8a62'])
plot.fill_by(:if_operon)
arc = plot.add(1, :arc, :axis, :val)
arc.color(["#a50026"])
labels = plot.add(2, :labels, :axis, :name)
labels.text_size(0.5)
plot.text_size("0")
plot.padding(0.2)
plot.show
Out[10]:
In [11]:
plot.group_by
Out[11]:
:name