;------------------------------------------------------------------------------
;
; Create an empty Netcdf file with metadata, for a single ERA-Interim variable.
;
; 2012-dec-22	Original version.  By Dave Allured, NOAA/PSD/CIRES/CAB.
;		This version creates a Netcdf-4 classic file with compression
;		  on the main data variable, and unlimited time dimension.
;		Use Netcdf-4 classic for now, because NCL 6.1.0 does not
;		  have full read/write support for "full Netcdf-4".
;		Get original metadata from Netcdf source file.
; 2013-jan-07	Switch to Grib source format.
;		Add actual_range statistics by time step and level.
;		Adjust local var names.
; 2013-jan-25	Use "temp" as var name for temperature.
;
  program_id = "era-create.ncl version 2013-jan-25"
;
; Notes:
;
; This program creates an empty ERA-Interim file for a single
; data variable.  The empty file is populated with the variable
; definition, coordinate variables, and a variety of attributes.
; Data will be added later on the record dimension, with
; era-update.ncl.
;
; Data variables and coordinate variables are renamed from the
; rather cumbersome ERA names to convenient PSD var names.
; See the translation table below.
;
; Usage:
;
; 1.  cd to desired output directory.
;
; 2.  ncl era-create.ncl var=\"var name\"
;
; Specify the PSD var name, not the source var name.
;
;------------------------------------------------------------------------------

load "$LIB_NCL/strip_varatts.ncl"

begin

;----------------------------------------------------
; Run parameters.
;----------------------------------------------------

; Template for source data file name.
;
; This template matches Grib source files for the very first time
; step in the input file sequence.  Coordinates and metadata to
; initialize the new output file are copied from one of these
; matching source files.
;
; There are two source file sequences, the "sc" files and the
; "uv" files.  The two sequences are for two different sets of
; data variables.  We could search two files for the requested
; data var, or we could get the correct file name by table
; lookup.  For simplicity, the table lookup method is used here.
;
; The substring "FF" in this template will be replaced with the
; appropriate file sequence code from the lookup table below.
;
; Note, in this version, specify original grib file names
; *without* the .grib extension.

  in_template = "orig/1979/ei.oper.an.pl.regn128FF.1979010100"

; Template for output file name.
; Note, no path prefix in this version, file is created in the
; current directory.  Note hard coded time range; update
; manually, as needed.

  out_template = "VARNAME.1979-2012.nc"

; Output file attributes.

  dq      = str_get_dq ()
  newline = str_get_nl ()
  nl      = newline

  dataset         = "ERA-Interim"	; additional global attributes
  dataset_version = "2.0"
  conventions     = "CF-1.6"

  references = "http://www.ecmwf.int/research/era/do/get/era-interim;"   + nl \
    + "D. P. Dee et al (2011), The ERA-Interim reanalysis:"              + nl \
    + "  configuration and performance of the data assimilation system," + nl \
    + "  Q.J.R. Meteorol. Soc., Vol. 137: 553-597, DOI: 10.1002/qj.828"

  level_desc = "Multiple Levels"	; additional var attributes
  statistic  = "Individual 4xDaily Values"
  ;; var_desc is extracted from table below, or set to long_name.

; Output file configuration.

  out_format = "NetCDF4Classic"		; see setfileoption docs;
  					; NetCDF4 not supported until NCL 6.1.0
;;  out_format = "classic"	; ***** TEST ONLY

  chunk_sizes = (/ 1, 1, 0, 0 /)	; specify compression parameters
  					; note, zeros replaced below with
                                        ;   actual grid dimensions

  compression_level = 5			; level 5 found to be good tradeoff
  					; note, NCL automatically enables
                                        ;   the shuffle filter

  time_name  = "time"			; define time coordinates
  time_type  = "float"
  time_units = "days since 1960-01-01"
  calendar   = "gregorian"
  time_step  = "4 times daily"

  time_chunk_size = 16384		; Netcdf-4 chunk size for time
  					;   coordinate var
					; large value avoids default tiny
					;   chunks, optimizes access times

  var_min_max = "actual_range"		; name of statistics variable

; Translation table for data variable names, free format, space delimited.
; Only the first three columns are used.  The rest is included for
; documentation only.  Long name and units are from source var attributes.

  var_table = (/ \
                                                                               \
 ;PSD    Source         File                                                   \
 ;Name   Var Name       Code Long Name                   Units                 \
 ;-----  -------------- ---- --------------------------  -------------------   \
 "temp   T_GDS4_ISBL     sc  Temperature                 K                  ", \
 "ciwc   CIWC_GDS4_ISBL  sc  Cloud ice water content     kg kg**-1          ", \
 "clwc   CLWC_GDS4_ISBL  sc  Cloud liquid water content  kg kg**-1          ", \
 "div    D_GDS4_ISBL     sc  Divergence                  s**-1              ", \
 "geopot Z_GDS4_ISBL     sc  Geopotential                m**2 s**-2         ", \
 "omega  W_GDS4_ISBL     sc  Vertical velocity           Pa s**-1           ", \
 "ozone  O3_GDS4_ISBL    sc  Ozone mass mixing ratio     kg kg**-1          ", \
 "pv     PV_GDS4_ISBL    sc  Potential vorticity         K m**2 kg**-1 s**-1", \
 "rhum   R_GDS4_ISBL     sc  Relative humidity           %                  ", \
 "shum   Q_GDS4_ISBL     sc  Specific humidity           kg kg**-1          ", \
 "tcdc   CC_GDS4_ISBL    sc  Cloud cover                 (0 - 1)            ", \
 "uwnd   U_GDS4_ISBL     uv  U velocity                  m s**-1            ", \
 "vort   VO_GDS4_ISBL    sc  Vorticity (relative)        s**-1              ", \
 "vwnd   V_GDS4_ISBL     uv  V velocity                  m s**-1            " /)

  var_desc_table = (/ 		; *** this table is fixed width columns,      \
                                ; *** and order must be same as first table   \
                                                                              \
    ;PSD Name  Long Name                   Dictionary var_desc (if different) \
    ;--------  --------------------------  ---------------------------------- \
    "temp      Temperature                                                 ", \
    "ciwc      Cloud ice water content                                     ", \
    "clwc      Cloud liquid water content                                  ", \
    "div       Divergence                                                  ", \
    "geopot    Geopotential                                                ", \
    "omega     Vertical velocity           Omega (dp/dt)                   ", \
    "ozone     Ozone mass mixing ratio                                     ", \
    "pv        Potential vorticity                                         ", \
    "rhum      Relative humidity                                           ", \
    "shum      Specific humidity                                           ", \
    "tcdc      Cloud cover                 Total Cloud Cover               ", \
    "uwnd      U velocity                  u-wind                          ", \
    "vort      Vorticity (relative)        Relative Vorticity              ", \
    "vwnd      V velocity                  v-wind                          " /)

; Get requested var name from the command line.

  if (.not. isdefined ("var")) then
    print ("")
    print ("*** Missing command argument.  Usage:")
    print ("*** ncl era-create.ncl var=\" + dq + "varname\" + dq)
    status_exit (1)
  end if

; Look up the PSD var name in the var table.

  psd_vars = str_get_field (var_table, 1, " ")	; parse table, space delimited
  vi = ind (var .eq. psd_vars)

  if (ismissing (vi)) then
    print ("*** Abort, var name is unknown: " + dq + var + dq)
    print ("*** Please select from one of the PSD var names in the first" \
      + " column:")
    print (var_table+"")
    status_exit (1)
  end if

  era_var   = str_get_field (var_table(vi), 2, " ")
  file_code = str_get_field (var_table(vi), 3, " ")

; Get the PSD var_desc from the table.
; If blank, will use original long_name later.

  var_desc = str_get_cols (var_desc_table(vi), 38, 999)
  var_desc = str_strip (var_desc)		; remove trailing spaces

; Construct file name for the corresponding metadata input file.

  infile = str_sub_str (in_template, "FF", file_code)
  infile2 = infile + ".grib"			; helper extension for grib

; Construct the target output file name.

  outfile = str_sub_str (out_template, "VARNAME", var)

;----------------------------------------------------
; Read metadata source file.
;----------------------------------------------------

  print ("")
  print ("Input file for metadata = " + infile)

  if (.not. isfilepresent (infile)) then
     print ("*** Abort: File not found.")
     status_exit (1)
  end if

  in = addfile (infile2, "r")

; Find the requested ERA data var.

  print ("Requested output var name  = " + var)
  print ("Corresponding ERA var name = " + era_var)

  if (.not. isfilevar (in, era_var)) then
    print ("*** Abort, requested ERA variable is not found in the input file.")
    status_exit (1)
  end if

; Read attributes and coordinate vars on this data var.
; Note, ERA source files are single time step, and have no time dimension!

; Split read for coordinates, for speed.
; Assume fixed dimensions (level, lat, lon).

  sample1 = in->$era_var$(0:0,:,:)	; read one grid for single level
  sample2 = in->$era_var$(:,0,0)	; read single grid point, all levels

  var_type = typeof (sample1)

  if (var_desc .eq. "") then		; use long_name as var_desc
     var_desc = sample1@long_name	; unless value specified in table
  end if

  print ("Long name                  = " + sample1@long_name)
  print ("Var_desc                   = " + var_desc)
  print ("Data units                 = " + sample1@units)
  print ("Data type                  = " + var_type)

; Isolate the coordinate variables.

  ydim = sample1!1	; lat		; get file dim names
  xdim = sample1!2	; lon
  zdim = sample2!0	; level

  lats   = sample1&$ydim$		; isolate the coordinate variables
  lons   = sample1&$xdim$
  levels = sample2&$zdim$

  lats!0   = "lat"			; change to PSD standard dimension names
  lons!0   = "lon"
  levels!0 = "level"

;----------------------------------------------------
; Set up output variables and metadata.
;----------------------------------------------------

; Remove confusing, unwanted source attributes.

  unwanted = "Di La1 La2 Lo1 Lo2 N"
  strip_VarAtts (lats, str_split (unwanted, " "))
  strip_VarAtts (lons, str_split (unwanted, " "))

  lats&lat     = lats			; force NCL to understand coordinate
  lons&lon     = lons			; variables when writing to file
  levels&level = levels

; Show input dimensions.

  nlevs = dimsizes (levels)
  nlats = dimsizes (lats)
  nlons = dimsizes (lons)

  print ("")
  print ("Number of levels     = " + nlevs)
  print ("Number of latitudes  = " + nlats)
  print ("Number of longitudes = " + nlons)

; Insert actual dimensions for chunking sizes, for dims indicated by zeros.

  insert_chunk_sizes = (/ 1, nlevs, nlats, nlons /)
  chunk_sizes = where (chunk_sizes .eq. 0, insert_chunk_sizes, chunk_sizes)

  print ("Chunk sizes          = " + chunk_sizes(0) + " " + chunk_sizes(1) \
    + " " + chunk_sizes(2) + " " + chunk_sizes(3) + " (time level lat lon)")

; Data var attributes, starting with inherited.

  unwanted = "initial_time forecast_time_units forecast_time" \
           + " level_indicator center"			; sample1 contains
  strip_VarAtts (sample1, str_split (unwanted, " "))	; var attribs to keep

  sample1@missing_value      = sample1@_FillValue
  sample1@era_var_name       = era_var
  sample1@file_sequence_code = file_code

; Attributes for data dictionary.

  sample1@dataset    = dataset
  sample1@var_desc   = var_desc
  sample1@level_desc = level_desc
  sample1@statistic  = statistic

; Add actual_range attributes.

  lats@actual_range    = (/ lats(0),   lats(nlats-1)   /)
  lons@actual_range    = (/ lons(0),   lons(nlons-1)   /)
  levels@actual_range  = (/ levels(0), levels(nlevs-1) /)

; Actual_ranges for time coordinates and data are added by era-update.ncl.

;----------------------------------------------------
; Write the new output file.
;----------------------------------------------------

  print ("")
  print ("Create output file: " + outfile)

  if (isfilepresent (outfile)) then	; *** delete existing file if needed
    system ("rm " + outfile)		; *** FOR TESTING ONLY
  end if

  setfileoption ("netcdf", "Format", out_format)   ; select netcdf-4 format

  out = addfile (outfile, "c")			   ; create netcdf file

; Write global attributes.

  time_stamp = systemfunc ("date")

  out@title   = dataset + " " + sample1@long_name + ", " + time_step
  out@history = time_stamp + ":" + newline \
    + "Initial file made by " + program_id
  out@dataset     = dataset
  out@version     = dataset_version
  out@references  = references
  out@center      = sample2@center
  out@metadata_source_file = infile
  out@Conventions = conventions

; Write fixed coordinate variables, in data subscript order.
; Note, dimensions are created automatically by NCL.

  out->level = levels
  out->lat   = lats
  out->lon   = lons

; Set up the unlimited time dimension to allow future appends.

  dim_name   = time_name		; dim and var must share the same name
  dummy_size = -1
  unlimited  = True
  filedimdef (out, dim_name, dummy_size, unlimited)

; Define the empty time coordinate variable.

  filevardef (out, time_name, time_type, dim_name)	; define 1-D variable
  filevarchunkdef (out, time_name, time_chunk_size)	; avoid tiny time chunks

  out->time@long_name = "time"
  out->time@units     = time_units
  out->time@calendar  = calendar
  out->time@time_step = time_step

; Define the empty data variable.

  dim_names_4d = (/ "time", "level", "lat", "lon" /)

  filevardef (out, var, var_type, dim_names_4d)		; define 4-D variable
  filevarchunkdef (out, var, chunk_sizes)		; set chunk sizes
  filevarcompressleveldef (out, var, compression_level)	; set compression level
  filevarattdef (out, var, sample1)			; write var attributes

; Define the empty statistics variable.

  min_max_dim = "min_max"			; extra dimension for min & max
  dim_size    = 2				; subscripts 1 = min, 2 = max
  unlimited   = True
  filedimdef (out, min_max_dim, dim_size, .not. unlimited)

  dim_names_3d   = (/ "time", "level", min_max_dim /)
  chunk_sizes_3d = (/ 512, nlevs, dim_size /)

  filevardef (out, var_min_max, var_type, dim_names_3d)	; define statistics var
  filevarchunkdef (out, var_min_max, chunk_sizes_3d)	; set chunk sizes
  
  atts               = new (1, var_type)
  atts@_FillValue    = sample1@_FillValue
  atts@missing_value = sample1@missing_value
  atts@units         = sample1@units
  atts@parent_var    = var
  atts@long_name     = "Actual data range at each time and level"

  filevarattdef (out, var_min_max, atts)	; write statistics attributes

  print ("Done.")
end
exit