Re: ncl performance

From: Saji N. Hameed <saji_at_nyahnyahspammersnyahnyah>
Date: Tue, 16 Jun 2009 22:34:27 +0900

Hi Micah,

I recommend you to buy a customized server for your purpose. I just bought one
with much help from a colleague. Basically I bought a reasonably fast processor
(intel E8400, 3 GHz) and a good amount of RAM - As Dennis mentioned, it is important to
consider the speed of data movement. So I guess the specs of the RAM may also
be important. Mine is a G.SKILL DDR2 SDRAM 4GB (800MHz/PC6400PK). Everything
flies compared to our best PC based servers at the office. (Because I could chose the
components for good computing performance, the cost was less than $1000).

It may still be worth buying a dual or quad core, if you can write a scheduler
for submitting your jobs. My simple scheduler (in Ruby) to feed jobs is inlined
below, as an example. Here you assemble all the jobs and submit.
Since it is a script, you have to keep manually feeding jobs. It will run jobs
as resources become available. Alternative way is to have the scheduler as
a server process. Your NCL script will submit a job to the server and it will
keep pulling jobs out a queue when resources become available and then run it.

Best wishes with your new computer purchase.

saji

--
require File.join(File.dirname(__FILE__),'../lib/experiment')
EXP_HOME="/home/saji/CloudClusters/LBMExp"
nodes=Node.new('115.88.9.44','saji','xxxx',2)
lats=[-20,-10,0,10,20]
lons=[]
(0..360).step(10) { |i| lons << i}
Experiment.nodes(*nodes)
exp1=Experiment.new("AUG53-00","frc1",lats,lons,11,EXP_HOME)
exp2=Experiment.new("AUG79-00","frc1",lats,lons,11,EXP_HOME)
exp3=Experiment.new("AUG53-78","frc1",lats,lons,11,EXP_HOME)
Experiment.run
--------------- experiment.rb
$:.unshift File.expand_path(File.join(File.dirname(__FILE__)))
require 'scheduler.at_home'
class Experiment
  def self.nodes(*nodes)
    @@scheduler=Scheduler.new
    nodes.each {|node| @@scheduler.add_node(node)}
  end
  def initialize(basic_state,forcing_type,lats,lons,tend,root_dir)
    @basic_state=basic_state
    @forcing_type=forcing_type
    @lats=lats
    @lons=lons
    @tend=tend
    @root_dir=root_dir
    @jobs=[]
  end
  def lons
    @lons.map {|l| "#{l}E"}
  end
  def lats
    @lats.map {|l| l<0 ? "#{l.abs}S" : "#{l}N"}
  end
  def jobs
    lons.each do |lon|
      lats.each do |lat|
        @jobs << Job.new(@basic_state,@forcing_type,lat,lon,@tend,@root_dir) 
      end
    end
    @jobs
  end
  def self.run
    ObjectSpace.each_object(self) do |expt|
      @@scheduler.add_job(*expt.jobs)
    end
    @@scheduler.run
  end
end
----scheduler.at_home.rb
$:.unshift File.expand_path(File.join(File.dirname(__FILE__)))
require 'jobs'
#require 'net/ssh'
require 'nodes'
require 'q'
require 'socket'
class Scheduler
  attr_reader :q
  attr_accessor :free_nodes
  def initialize
    @nodes=[]
    @q=Q.new
  end
  def add_node(*nodes)
    nodes.each { |node| @nodes << node}
  end
  def add_job(*jobs)
    jobs.each {|job| q.add_job(job)}
  end
  def winning_node
    "realigning nodes"
    @nodes << @nodes.shift
    job_arr=[]
    @nodes.each {|n| job_arr << n.num_jobs if n.num_jobs >0 and n.num_cpus >= n.num_jobs }
    return @nodes[0] if job_arr.empty?
    winner=job_arr.index(job_arr.min)
    return @nodes[winner]
  end
  def free_nodes
    @nodes.map {|n| n.num_jobs <= n.num_cpus}.include?(true)
  end
  def marshall(job,node)
     dir=job.tmp_dir
     FileUtils.mkdir_p dir
     File.open("#{dir}/job","w") { |f| Marshal.dump(job,f) }
  end
  def local_ip
    orig, Socket.do_not_reverse_lookup = Socket.do_not_reverse_lookup, true
    UDPSocket.open do |s|
      s.connect '210.98.49.22', 1
      s.addr.last
    end
    ensure
    Socket.do_not_reverse_lookup = orig
    end
  def ssh(ip,user,pass,dir)
      print "submitting local job"
      system(" cd #{dir}; ruby reaper.rb ")
  end
  def reaper(work_dir,root_dir)
    %Q{
    #!/usr/bin/env ruby
    require '#{root_dir}/lib/jobs'
    job= Marshal.load(IO.read("#{work_dir}/job"))
    job.run
    }
  end
  def submit_job(job,node)
    puts "winner is #{node.ip}"
    puts "rundir #{job.tmp_dir}"
    node.add_job(job)
    marshall(job,node)
    dir=job.tmp_dir
    File.open("#{dir}/reaper.rb","w") {|f| f.puts reaper(dir,job.root_dir)}
      ssh(node.ip,node.user,node.pw,dir)
  end
  def jobs_are_running?
    (@nodes.map {|n| n.running_jobs?}).include?(true)
  end
  def clear_jobs
    @nodes.each do |node|
      node.running_jobs.each do |job|
        if job.finished?
          p "Job is over"
          node.finished_jobs << node.rm_job(job)
        elsif job.failed?
            p "Job failed"
            node.failed_jobs << node.rm_job(job)  
            #p node.failed_jobs
        else
         # p "Job #{job.out_fil} running on #{node.ip}"
          sleep 2
        end
      end
    end
  end
  def run
    Thread.abort_on_exception=false
    innn=[]
    while q.length >= 1
        
      if free_nodes
        clear_jobs
        job=q.get_job
        unless job.finished?
          now=Time.now
          innn << Thread.new {submit_job(job,winning_node)}
          puts "Time elapsed :: #{(Time.now-now)} secs"
          sleep 5
        end
      end
      if jobs_are_running?
        clear_jobs 
      end
    end
  innn.each {|t| t.join}
  end
end
#node1=Node.new('210.98.49.21','saji','xxxx',2)
#node2=Node.new('210.98.49.22','saji','xxxx',2)
#node3=Node.new('210.98.49.111','saji','xxxx',2)
#job1=Job.new("JJA53-00","frc1","20N","80E",1)
#job2=Job.new("JJA53-00","frc1","10N","80E",1)
#job3=Job.new("JJA53-00","frc1","0N","80E",1)
#job4=Job.new("JJA53-00","frc1","10S","80E",1)
#scheduler=Scheduler.new
#scheduler.add_node(node1,node2,node3)
#scheduler.add_job(job1,job2,job3,job4) #,job2,job3,job3,job2,job1,job3)
#scheduler.run
-----nodes.rb
class Node
  attr_reader :ip, :user, :pw, :num_cpus, :running_jobs
  attr_accessor :failed_jobs, :finished_jobs
  attr_accessor :num_jobs
  def initialize(ip,user,pw,num_cpus)
    @ip=ip
    @user=user
    @pw=pw
    @num_cpus=num_cpus
    @num_jobs=0
    @finished_jobs=[]
    @running_jobs=[]
    @failed_jobs=[]
  end
  def add_job(job)
    @running_jobs << job
    @num_jobs+=1
  end
  def job_index(job)
    @running_jobs.index(job)
  end
  def rm_job(job)
    @running_jobs.delete_at(job_index(job))
    @num_jobs-=1
  end
  def running_jobs?
    not @running_jobs.empty?
  end
end
--- jobs.rb
require 'fileutils'
class Job
  attr_reader :root_dir
  def initialize(bs,fs,lat,lon,tend,root_dir)
    @bs  = bs
    @fs  = fs
    @lat = lat
    @lon = lon
    @tend= tend
    @root_dir=root_dir
  end
  def exp_home
    "#{@root_dir}/#{@bs.upcase}_#{@fs.upcase}"
  end
  def tmp_dir
    "#{exp_home}/tmp_#{@fs}#{@lon}#{@lat}"
  end
  def out_dir
    "#{exp_home}/out"
  end
  def out_fil
    "model.#{@lon}#{@lat}.grd"
  end
  def bs_fil
    "#{@root_dir}/bs/#{@bs}"
  end
  def frc_dir
    "#{@root_dir}/#{@fs}"
  end
  def fs_fil
    "#{frc_dir}/frc.#{@lon}#{@lat}.grd"
  end
  def mk_dirs
    FileUtils.mkdir_p exp_home
    FileUtils.mkdir_p tmp_dir
    FileUtils.mkdir_p out_dir
  end
  def namelist
    %Q{
    &nmrun  run='linear model'                                 &end
    &nmtime start=0,1,1,0,0,0, end=0,1,#{@tend},0,0,0          &end
    &nmhdif order=4, tefold=0.0833, tunit='DAY'                &end
    &nmdelt delt=20, tunit='MIN', inistp=2                     &end
    &nmdamp ddragv=0.5,0.5,0.5,5,30,30,30,30,30,30,30,30,30,30,30,30,30,30,1,0.5,
        ddragd=0.5,0.5,0.5,5,30,30,30,30,30,30,30,30,30,30,30,30,30,30,1,0.5,
        ddragt=0.5,0.5,0.5,5,30,30,30,30,30,30,30,30,30,30,30,30,30,30,1,0.5,
        tunit='DAY'                                            &end
    &nminit file='#{bs_fil}' , DTBFR=0., DTAFTR=0., TUNIT='DAY' &end
    &nmrstr file='#{tmp_dir}/Restart.amat', tintv=1, tunit='MON',  overwt=t    &end
 
    &nmvdif vdifv=1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,
        vdifd=1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,
        vdift=1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,                         &end
    &nmbtdif tdmpc=0.                                           &end
    &nmfrc  ffrc='#{fs_fil}',   oper=f, nfcs=1                       &end
    &nmsfrc fsfrc='#{fs_fil}', ofrc=t, nsfcs=1, fsend=0,1,10,0,0,0 &end
 
    &nmchck ocheck=f, ockall=f                                  &end
    &nmdata item='GRZ',    file=' '                             &end
 
    &nmhisd tintv=1, tavrg=1, tunit='DAY'                       &end
    &nmhist item='PSI',  file='psi', tintv=1, tavrg=1, tunit='DAY' &end
    &nmhist item='CHI',  file='chi', tintv=1, tavrg=1, tunit='DAY' &end
    &nmhist item='U',    file='u',   tintv=1, tavrg=1, tunit='DAY' &end
    &nmhist item='V',    file='v',   tintv=1, tavrg=1, tunit='DAY' &end
    &nmhist item='OMGF', file='w',   tintv=1, tavrg=1, tunit='DAY' &end
    &nmhist item='T',    file='t',   tintv=1, tavrg=1, tunit='DAY' &end
    &nmhist item='Z',    file='z',   tintv=1, tavrg=1, tunit='DAY' &end
    &nmhist item='PS',   file='p',   tintv=1, tavrg=1, tunit='DAY' &end
    }
  end
  def setpar
    %Q{
    &nmfgt cfs='#{tmp_dir}/psi',
        cfc='#{tmp_dir}/chi',
        cfu='#{tmp_dir}/u',
        cfv='#{tmp_dir}/v',
        cfw='#{tmp_dir}/w',
        cft='#{tmp_dir}/t',
        cfz='#{tmp_dir}/z',
        cfp='#{tmp_dir}/p',
        cfq='#{tmp_dir}/q',
        cftc='#{tmp_dir}/dtc',
        cfqc='#{tmp_dir}/dqc',
        cftl='#{tmp_dir}/dtl',
        cfql='#{tmp_dir}/dql',
        cfpr='#{tmp_dir}/pr',
        cfo='#{out_dir}/#{out_fil}',
        fact=1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,
        opl=t,
    &end
 
    &nmbs  cbs0 = '#{bs_fil}'
        cbs  = '#{bs_fil}.grd'
    &end
    &nmall owall=t
    &end
 
    &nmcls oclassic=t
    &end
      }
  end
  def lbm
    "#{@root_dir}/bin/lbm2.t42ml20ctintgr"
  end
  def gt2gr
    "#{@root_dir}/bin/gt2gr"
  end
  def run_lbm
    Dir.chdir(tmp_dir) do
      system("#{lbm} < SYSIN >> SYSOUT")
    end
  end
  def run_gt2gr
    Dir.chdir(tmp_dir) do
      system(gt2gr)
    end
  end
  def post_process
    File.open("#{tmp_dir}/SETPAR","w") {|f| f.puts(setpar); f.flush}
    run_gt2gr
  end
  def run
    unless outfil?
      FileUtils.touch("#{tmp_dir}/running")
      mk_dirs
      File.open("#{tmp_dir}/SYSIN","w") {|f| f.puts(namelist); f.flush}
      run_lbm
      post_process
      FileUtils.remove_dir tmp_dir
    end
  end
  def running?
    File.exist? "#{tmp_dir}"
  end
  def outfil?
    File.exist? "#{out_dir}/#{out_fil}"
  end
  def finished?
    outfil? and not running?
  end
  def failed?
    #(outfil? == false) and (running? == false)
    not finished? and not running?
  end
end
#job1=Job.new("JJA","frc1",11,1)
#job1.run
#job2=Job.new("JJA53-00","frc1","20N","90E",1.5)
#job2.run
-- 
Saji N. Hameed
APEC Climate Center          				
1463 U-dong, Haeundae-gu,                               +82 51 745 3951
BUSAN 612-020, KOREA                    		saji_at_apcc21.net
Fax: +82-51-745-3999
_______________________________________________
ncl-talk mailing list
List instructions, subscriber options, unsubscribe:
http://mailman.ucar.edu/mailman/listinfo/ncl-talk
Received on Tue Jun 16 2009 - 07:34:27 MDT

This archive was generated by hypermail 2.2.0 : Tue Jun 16 2009 - 10:05:45 MDT