Hi Micah,
I recommend you to buy a customized server for your purpose. I just bought one
with much help from a colleague. Basically I bought a reasonably fast processor
(intel E8400, 3 GHz) and a good amount of RAM - As Dennis mentioned, it is important to
consider the speed of data movement. So I guess the specs of the RAM may also
be important. Mine is a G.SKILL DDR2 SDRAM 4GB (800MHz/PC6400PK). Everything
flies compared to our best PC based servers at the office. (Because I could chose the
components for good computing performance, the cost was less than $1000).
It may still be worth buying a dual or quad core, if you can write a scheduler
for submitting your jobs. My simple scheduler (in Ruby) to feed jobs is inlined
below, as an example. Here you assemble all the jobs and submit.
Since it is a script, you have to keep manually feeding jobs. It will run jobs
as resources become available. Alternative way is to have the scheduler as
a server process. Your NCL script will submit a job to the server and it will
keep pulling jobs out a queue when resources become available and then run it.
Best wishes with your new computer purchase.
saji
--
require File.join(File.dirname(__FILE__),'../lib/experiment')
EXP_HOME="/home/saji/CloudClusters/LBMExp"
nodes=Node.new('115.88.9.44','saji','xxxx',2)
lats=[-20,-10,0,10,20]
lons=[]
(0..360).step(10) { |i| lons << i}
Experiment.nodes(*nodes)
exp1=Experiment.new("AUG53-00","frc1",lats,lons,11,EXP_HOME)
exp2=Experiment.new("AUG79-00","frc1",lats,lons,11,EXP_HOME)
exp3=Experiment.new("AUG53-78","frc1",lats,lons,11,EXP_HOME)
Experiment.run
--------------- experiment.rb
$:.unshift File.expand_path(File.join(File.dirname(__FILE__)))
require 'scheduler.at_home'
class Experiment
def self.nodes(*nodes)
@@scheduler=Scheduler.new
nodes.each {|node| @@scheduler.add_node(node)}
end
def initialize(basic_state,forcing_type,lats,lons,tend,root_dir)
@basic_state=basic_state
@forcing_type=forcing_type
@lats=lats
@lons=lons
@tend=tend
@root_dir=root_dir
@jobs=[]
end
def lons
@lons.map {|l| "#{l}E"}
end
def lats
@lats.map {|l| l<0 ? "#{l.abs}S" : "#{l}N"}
end
def jobs
lons.each do |lon|
lats.each do |lat|
@jobs << Job.new(@basic_state,@forcing_type,lat,lon,@tend,@root_dir)
end
end
@jobs
end
def self.run
ObjectSpace.each_object(self) do |expt|
@@scheduler.add_job(*expt.jobs)
end
@@scheduler.run
end
end
----scheduler.at_home.rb
$:.unshift File.expand_path(File.join(File.dirname(__FILE__)))
require 'jobs'
#require 'net/ssh'
require 'nodes'
require 'q'
require 'socket'
class Scheduler
attr_reader :q
attr_accessor :free_nodes
def initialize
@nodes=[]
@q=Q.new
end
def add_node(*nodes)
nodes.each { |node| @nodes << node}
end
def add_job(*jobs)
jobs.each {|job| q.add_job(job)}
end
def winning_node
"realigning nodes"
@nodes << @nodes.shift
job_arr=[]
@nodes.each {|n| job_arr << n.num_jobs if n.num_jobs >0 and n.num_cpus >= n.num_jobs }
return @nodes[0] if job_arr.empty?
winner=job_arr.index(job_arr.min)
return @nodes[winner]
end
def free_nodes
@nodes.map {|n| n.num_jobs <= n.num_cpus}.include?(true)
end
def marshall(job,node)
dir=job.tmp_dir
FileUtils.mkdir_p dir
File.open("#{dir}/job","w") { |f| Marshal.dump(job,f) }
end
def local_ip
orig, Socket.do_not_reverse_lookup = Socket.do_not_reverse_lookup, true
UDPSocket.open do |s|
s.connect '210.98.49.22', 1
s.addr.last
end
ensure
Socket.do_not_reverse_lookup = orig
end
def ssh(ip,user,pass,dir)
print "submitting local job"
system(" cd #{dir}; ruby reaper.rb ")
end
def reaper(work_dir,root_dir)
%Q{
#!/usr/bin/env ruby
require '#{root_dir}/lib/jobs'
job= Marshal.load(IO.read("#{work_dir}/job"))
job.run
}
end
def submit_job(job,node)
puts "winner is #{node.ip}"
puts "rundir #{job.tmp_dir}"
node.add_job(job)
marshall(job,node)
dir=job.tmp_dir
File.open("#{dir}/reaper.rb","w") {|f| f.puts reaper(dir,job.root_dir)}
ssh(node.ip,node.user,node.pw,dir)
end
def jobs_are_running?
(@nodes.map {|n| n.running_jobs?}).include?(true)
end
def clear_jobs
@nodes.each do |node|
node.running_jobs.each do |job|
if job.finished?
p "Job is over"
node.finished_jobs << node.rm_job(job)
elsif job.failed?
p "Job failed"
node.failed_jobs << node.rm_job(job)
#p node.failed_jobs
else
# p "Job #{job.out_fil} running on #{node.ip}"
sleep 2
end
end
end
end
def run
Thread.abort_on_exception=false
innn=[]
while q.length >= 1
if free_nodes
clear_jobs
job=q.get_job
unless job.finished?
now=Time.now
innn << Thread.new {submit_job(job,winning_node)}
puts "Time elapsed :: #{(Time.now-now)} secs"
sleep 5
end
end
if jobs_are_running?
clear_jobs
end
end
innn.each {|t| t.join}
end
end
#node1=Node.new('210.98.49.21','saji','xxxx',2)
#node2=Node.new('210.98.49.22','saji','xxxx',2)
#node3=Node.new('210.98.49.111','saji','xxxx',2)
#job1=Job.new("JJA53-00","frc1","20N","80E",1)
#job2=Job.new("JJA53-00","frc1","10N","80E",1)
#job3=Job.new("JJA53-00","frc1","0N","80E",1)
#job4=Job.new("JJA53-00","frc1","10S","80E",1)
#scheduler=Scheduler.new
#scheduler.add_node(node1,node2,node3)
#scheduler.add_job(job1,job2,job3,job4) #,job2,job3,job3,job2,job1,job3)
#scheduler.run
-----nodes.rb
class Node
attr_reader :ip, :user, :pw, :num_cpus, :running_jobs
attr_accessor :failed_jobs, :finished_jobs
attr_accessor :num_jobs
def initialize(ip,user,pw,num_cpus)
@ip=ip
@user=user
@pw=pw
@num_cpus=num_cpus
@num_jobs=0
@finished_jobs=[]
@running_jobs=[]
@failed_jobs=[]
end
def add_job(job)
@running_jobs << job
@num_jobs+=1
end
def job_index(job)
@running_jobs.index(job)
end
def rm_job(job)
@running_jobs.delete_at(job_index(job))
@num_jobs-=1
end
def running_jobs?
not @running_jobs.empty?
end
end
--- jobs.rb
require 'fileutils'
class Job
attr_reader :root_dir
def initialize(bs,fs,lat,lon,tend,root_dir)
@bs = bs
@fs = fs
@lat = lat
@lon = lon
@tend= tend
@root_dir=root_dir
end
def exp_home
"#{@root_dir}/#{@bs.upcase}_#{@fs.upcase}"
end
def tmp_dir
"#{exp_home}/tmp_#{@fs}#{@lon}#{@lat}"
end
def out_dir
"#{exp_home}/out"
end
def out_fil
"model.#{@lon}#{@lat}.grd"
end
def bs_fil
"#{@root_dir}/bs/#{@bs}"
end
def frc_dir
"#{@root_dir}/#{@fs}"
end
def fs_fil
"#{frc_dir}/frc.#{@lon}#{@lat}.grd"
end
def mk_dirs
FileUtils.mkdir_p exp_home
FileUtils.mkdir_p tmp_dir
FileUtils.mkdir_p out_dir
end
def namelist
%Q{
&nmrun run='linear model' &end
&nmtime start=0,1,1,0,0,0, end=0,1,#{@tend},0,0,0 &end
&nmhdif order=4, tefold=0.0833, tunit='DAY' &end
&nmdelt delt=20, tunit='MIN', inistp=2 &end
&nmdamp ddragv=0.5,0.5,0.5,5,30,30,30,30,30,30,30,30,30,30,30,30,30,30,1,0.5,
ddragd=0.5,0.5,0.5,5,30,30,30,30,30,30,30,30,30,30,30,30,30,30,1,0.5,
ddragt=0.5,0.5,0.5,5,30,30,30,30,30,30,30,30,30,30,30,30,30,30,1,0.5,
tunit='DAY' &end
&nminit file='#{bs_fil}' , DTBFR=0., DTAFTR=0., TUNIT='DAY' &end
&nmrstr file='#{tmp_dir}/Restart.amat', tintv=1, tunit='MON', overwt=t &end
&nmvdif vdifv=1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,
vdifd=1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,
vdift=1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3,1.d3, &end
&nmbtdif tdmpc=0. &end
&nmfrc ffrc='#{fs_fil}', oper=f, nfcs=1 &end
&nmsfrc fsfrc='#{fs_fil}', ofrc=t, nsfcs=1, fsend=0,1,10,0,0,0 &end
&nmchck ocheck=f, ockall=f &end
&nmdata item='GRZ', file=' ' &end
&nmhisd tintv=1, tavrg=1, tunit='DAY' &end
&nmhist item='PSI', file='psi', tintv=1, tavrg=1, tunit='DAY' &end
&nmhist item='CHI', file='chi', tintv=1, tavrg=1, tunit='DAY' &end
&nmhist item='U', file='u', tintv=1, tavrg=1, tunit='DAY' &end
&nmhist item='V', file='v', tintv=1, tavrg=1, tunit='DAY' &end
&nmhist item='OMGF', file='w', tintv=1, tavrg=1, tunit='DAY' &end
&nmhist item='T', file='t', tintv=1, tavrg=1, tunit='DAY' &end
&nmhist item='Z', file='z', tintv=1, tavrg=1, tunit='DAY' &end
&nmhist item='PS', file='p', tintv=1, tavrg=1, tunit='DAY' &end
}
end
def setpar
%Q{
&nmfgt cfs='#{tmp_dir}/psi',
cfc='#{tmp_dir}/chi',
cfu='#{tmp_dir}/u',
cfv='#{tmp_dir}/v',
cfw='#{tmp_dir}/w',
cft='#{tmp_dir}/t',
cfz='#{tmp_dir}/z',
cfp='#{tmp_dir}/p',
cfq='#{tmp_dir}/q',
cftc='#{tmp_dir}/dtc',
cfqc='#{tmp_dir}/dqc',
cftl='#{tmp_dir}/dtl',
cfql='#{tmp_dir}/dql',
cfpr='#{tmp_dir}/pr',
cfo='#{out_dir}/#{out_fil}',
fact=1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,
opl=t,
&end
&nmbs cbs0 = '#{bs_fil}'
cbs = '#{bs_fil}.grd'
&end
&nmall owall=t
&end
&nmcls oclassic=t
&end
}
end
def lbm
"#{@root_dir}/bin/lbm2.t42ml20ctintgr"
end
def gt2gr
"#{@root_dir}/bin/gt2gr"
end
def run_lbm
Dir.chdir(tmp_dir) do
system("#{lbm} < SYSIN >> SYSOUT")
end
end
def run_gt2gr
Dir.chdir(tmp_dir) do
system(gt2gr)
end
end
def post_process
File.open("#{tmp_dir}/SETPAR","w") {|f| f.puts(setpar); f.flush}
run_gt2gr
end
def run
unless outfil?
FileUtils.touch("#{tmp_dir}/running")
mk_dirs
File.open("#{tmp_dir}/SYSIN","w") {|f| f.puts(namelist); f.flush}
run_lbm
post_process
FileUtils.remove_dir tmp_dir
end
end
def running?
File.exist? "#{tmp_dir}"
end
def outfil?
File.exist? "#{out_dir}/#{out_fil}"
end
def finished?
outfil? and not running?
end
def failed?
#(outfil? == false) and (running? == false)
not finished? and not running?
end
end
#job1=Job.new("JJA","frc1",11,1)
#job1.run
#job2=Job.new("JJA53-00","frc1","20N","90E",1.5)
#job2.run
--
Saji N. Hameed
APEC Climate Center
1463 U-dong, Haeundae-gu, +82 51 745 3951
BUSAN 612-020, KOREA saji_at_apcc21.net
Fax: +82-51-745-3999
_______________________________________________
ncl-talk mailing list
List instructions, subscriber options, unsubscribe:
http://mailman.ucar.edu/mailman/listinfo/ncl-talk
Received on Tue Jun 16 2009 - 07:34:27 MDT
This archive was generated by hypermail 2.2.0 : Tue Jun 16 2009 - 10:05:45 MDT