A sample function and test driver is attached.
Quite possibly, others could write more efficient code.
Wall Clock Timings:
8000 elements: less than 1 sec
40000 elements: 6 seconds
80000 elements: 25 sec
Good luck
Vladyslav Lyubartsev wrote:
>
> Hello,
>
> We have two huge arrays with the same dimension:
>
> Days = (/19950308,19950314,19950314,...,20081228,20081231,20081231/)
>
> Data = (/ 12.1, 22.5, 32.0, 12.8, 16.0, 32.1/)
>
> There can be several data values for the same day.
>
> Days are irregular. Sure we can sort them, however there will be gaps 
> and duplicates in this array.
>
> Is it possible to carry out the following tasks efficiently, no loops, 
> using only built-in NCL functions:
>
> 1) Construct the list of unique days, no duplicates
>
> 2) Calculate amount of duplicates for each unique day
>
> 3) Calculate the data average for each unique day
>
> I see no such solution, only loops.
>
> Am I right?
>
> Thanks,
>
> Slava
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> ncl-talk mailing list
> List instructions, subscriber options, unsubscribe:
> http://mailman.ucar.edu/mailman/listinfo/ncl-talk
>   
-- ====================================================== Dennis J. Shea tel: 303-497-1361 | P.O. Box 3000 fax: 303-497-1333 | Climate Analysis Section | Climate & Global Dynamics Div. | National Center for Atmospheric Research | Boulder, CO 80307 | USA email: shea 'at' ucar.edu | ======================================================
load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/contributed.ncl"
function uniqueDays(Days[*]:integer, Data[*]:numeric)
; There can be several data values for the same day.
; Days are irregular. Sure we can sort them, however there will 
; be gaps and duplicates in this array.
; 
; Is it possible to carry out the following tasks efficiently, 
; no loops, using only built-in NCL functions:
; 
; 1)  Construct the list of unique days, no duplicates
; 2)  Calculate amount of duplicates for each unique day
; 3)  Calculate the data average for each unique day
local nDays, inum, uFlg, x, n, k, ii
begin
  nDays = dimsizes( Days )
  inum  = new(nDays, integer)
  uFlg  = new(nDays, logical)
  uFlg  = True
  if (isatt(Data, "_FillValue")) then
      x = new( (/nDays,3/), "float", getFillValue(Data))
  else
      x = new( (/nDays,3/), "float", 1e20)
  end if
      
  do n=0,nDays-1
     inum(n) = num(Days(n).eq.Days)
  end do
  if (all(inum.eq.1)) then        
      x(:,0) = Days               ; unique days  
      x(:,1) = 1                  ; number of duplicates
      x(:,2) = Data               ; average [only one value]
      k      = nDays-1            ; max subscript
  else
      k = -1
      do n=0,nDays-1
         if (inum(n).eq.1) then
             k = k+1
             x(k,0) = Days(n)        
             x(k,1) = 1          
             x(k,2) = Data(n)      
             uFlg(n)= False
         else
             if (uFlg(n)) then
                 k = k+1
                 x(k,0) = Days(n)
                 x(k,1) = inum(n)
                 ii     = ind(Days(n).eq.Days)
                 x(k,2) = avg(Data(ii))
                 uFlg(ii) = False     ; turn off all duplicate dates
                 delete(ii)           ; may change next iteration
             end if
         end if
      end do
  end if
  if (isatt(Data,"long_name")) then
      x_at_long_name = Data_at_long_name
  end if
  x_at_description = "(0) Date; (1) # Duplicates; (2) Avg"
  x!0    = "time"
  x!1    = "info"
  
  return( x(0:k,:) )
end
begin
;=======================================================
;                  MAIN
;=======================================================
  Days  = (/19950308,19950314,19950314,20081228,20081231,20081231, 20081231,20081231/)
  Data  = (/    12.1,    22.5,    32.0,    12.8,    16.0,    32.1,  -17.25, 33.9 /)
  nDays = dimsizes(Days)
  
  cr = inttochar(10)
  print(cr+"=======> "+nDays+" <======"+cr) 
  dayStat  =  uniqueDays(Days, Data)          
  printVarSummary(dayStat)
  uDays = floattoint( dayStat(:,0 ) )
  uDups = floattoint( dayStat(:,1 ) )
  uAvg  = dayStat(:,2 )
  print(uDays+"  "+uDups+"  "+uAvg )
; replicate 1000 time
  print(cr+"=======> "+nDays*1000+" <======"+cr) 
  DAYS = ndtooned( conform_dims( (/1000,nDays/), Days, 1))
  DATA = ndtooned( conform_dims( (/1000,nDays/), Data, 1))
  
  wcStrt   = systemfunc("date")
  DAYStat  = uniqueDays(DAYS, DATA)
  wallClockElapseTime(wcStrt, "size="+(nDays*1000), 0) 
  printVarSummary(DAYStat)
  uDAYS = floattoint( DAYStat(:,0 ) )
  uDUPS = floattoint( DAYStat(:,1 ) )
  uAVG  = DAYStat(:,2 )
  print(uDAYS+"  "+uDUPS+"  "+uAVG )
  delete(DAYS)
  delete(DATA)
; replicate 5000 time
  print(cr+"=======> "+nDays*5000+" <======"+cr) 
  DAYS = ndtooned( conform_dims( (/5000,nDays/), Days, 1))
  DATA = ndtooned( conform_dims( (/5000,nDays/), Data, 1))
  
  wcStrt   = systemfunc("date")
  DAYStat  = uniqueDays(DAYS, DATA)
  wallClockElapseTime(wcStrt, "size="+(nDays*5000), 0) 
  printVarSummary(DAYStat)
  uDAYS = floattoint( DAYStat(:,0 ) )
  uDUPS = floattoint( DAYStat(:,1 ) )
  uAVG  = DAYStat(:,2 )
  print(uDAYS+"  "+uDUPS+"  "+uAVG )
  delete(DAYS)
  delete(DATA)
; replicate 10000 time
  print(cr+"=======> "+nDays*10000+" <======"+cr) 
  DAYS = ndtooned( conform_dims( (/10000,nDays/), Days, 1))
  DATA = ndtooned( conform_dims( (/10000,nDays/), Data, 1))
  
  wcStrt   = systemfunc("date")
  DAYStat  = uniqueDays(DAYS, DATA)
  wallClockElapseTime(wcStrt, "size="+(nDays*10000), 0) 
  printVarSummary(DAYStat)
  uDAYS = floattoint( DAYStat(:,0 ) )
  uDUPS = floattoint( DAYStat(:,1 ) )
  uAVG  = DAYStat(:,2 )
  print(uDAYS+"  "+uDUPS+"  "+uAVG )
end
_______________________________________________
ncl-talk mailing list
List instructions, subscriber options, unsubscribe:
http://mailman.ucar.edu/mailman/listinfo/ncl-talk
Received on Tue Mar 17 2009 - 10:51:11 MDT
This archive was generated by hypermail 2.2.0 : Wed Mar 18 2009 - 14:50:21 MDT