;---------------------------------------------------------------------- ; This example reads an ASCII file that is formatted a specific way, and ; writes out the results to a netCDF file. ; ; The first line in the ASCII file must be a header, with each field ; separated by a single character delimiter (like a ","). The rest of ; the file must be such that each row contains all fields, each ; separated by the designated delimiter. ; ; The fields can be integer, float, double, character, or string. ; String fields cannot be written to a netCDF file. They have to ; be read in as character arrays and written out that way. ; ; To modify either one for your own data file, first search for the lines: ; ; ;============================================================ ; ; Main code ; ;============================================================ ; ; The lines you need to modify follow shortly: ; filename = "asc5.txt" ; ASCII file to read. ; nfields = 6 ; # of fields ; delimiter = "," ; field delimiter ; var_types = new(nfields,string) ; var_msg = new(nfields,string) ; var_strlens = new(nfields,integer) ; var to hold string lengths, ; ; just in case. ; . ; . ; . ; var_msg = "" ; Default to no missing ; var_msg(3) = "-999" ; Corresponds to field #4 ; var_types = "integer" ; Default to integer ; var_types(1:2) = "float" ; Second and third fields ; var_types(4) = "character" ; Corresponds to field #5 ; ; Change "var_types" to whatever the types of your fields are, and ; "var_msg" to what the missing value should be (an empty string ; indicates no missing value). The above code is defaulting all ; variable types to "integer", and then changing the 2nd and 3rd fields ; to type "float" and the fifth field to type "character" (which in this ; case is being used as a character array). The only field that will ; contain a missing value is the fourth field. ; ; The allowable variable types are "integer", "float", "double", ; "string", or "character". Note that if you read in a variable as a ; string, it won't get written to the netCDF file because only character ; arrays can be written to a netCDF file. ; ;---------------------------------------------------------------------- ;---------------------------------------------------------------------- ; This function returns the index locations of the given delimiter ; in a row or several rows of strings. ;---------------------------------------------------------------------- function delim_indices(strings,nfields,delimiter) local cstrings, cdelim begin nrows = dimsizes(strings) ; ; Handle special case if we only have one string. Make sure it ; is put into a 2D array. ; if(nrows.eq.1) then cstrings = new((/1,strlen(strings)+1/),character) end if cstrings = stringtochar(strings) ; Convert to characters. cdelim = stringtochar(delimiter) ; Convert delimiter to character. ; ; Some error checking here. Make sure delimiter is one character. ; nc = dimsizes(cdelim) rank = dimsizes(nc) if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then print("delim_indices: fatal: the delimiter you've selected") print("must be a single character. Can't continue.") exit end if ; ; Create array to hold indices of delimiter locations, and then loop ; through each row and find all the delimiters. Make sure each row has ; the correct number of delimiters. ; ndelims = nfields-1 cindices = new((/nrows,ndelims/),integer) do i = 0, nrows-1 ii = ind(cstrings(i,:).eq.cdelim(0)) ; ; Make sure there were delimiters on this row. If not, we just quit. ; This could probably be modified to do this more gracefully. ; if(any(ismissing(ii))) then print("delim_indices: fatal: I didn't find any delimiters") print("('" + delimiter + "') on row " + i + ". Can't continue.") exit end if if(dimsizes(ii).ne.ndelims) then print("delim_indices: fatal: I expected to find " + ndelims) print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".") print("Can't continue.") exit end if cindices(i,:) = ii delete(ii) ; For next time through loop end do return(cindices) end ;---------------------------------------------------------------------- ; This function removes whitespace (currently spaces and tabs) at ; the beginning and/or end of a character string, and replaces other ; whitespaces with underscores. ; ; 3/12/2009: Jonathan Vigh (JLV in the comments below) added some fixes ; to this function to get it working properly for whitespace at ; the end of a string. ; ; opt: ; = 0 --> just return the array ; = 1 --> remove whitespace at the beginning and end of array and ; convert other whitespaces to underscores ('_') ; = 2 --> remove whitespace at the beginning only ; = 3 --> remove whitespace at the end only ; ;---------------------------------------------------------------------- function deal_with_wspace(c[*]:character,opt:integer) local tab, space, ii, spaces, ibeg1, iend1 begin ; Do nothing. if(opt.eq.0) then return(c) end if space = inttochar(32) tab = inttochar(9) nullChar = inttochar(0) ; JLV - added this definition ; Get indices where there are no whitespaces. ; ii = ind(c.ne.space.and.c.ne.tab) ; this was the original check ii = ind(c.ne.space.and.c.ne.tab.and.c.ne.nullChar) ; JLV - added the ; nullChar check so ; that option 3 works ; correctly if(.not.any(ismissing(ii))) then ; Remove whitespace from beginning if(opt.eq.1.or.opt.eq.2) then ibeg1 = min(ii) ; First non whitespace character. else ibeg1 = 0 end if ; Remove whitespace from end if(opt.eq.1.or.opt.eq.3) then iend1 = max(ii) ; Last non whitespace character. ; iend1 = ii(dimsizes(ii)-1) ; JLV - don't pull off the null character as the last non whitespace character - this is moot if the nullChar is checked above else iend1 = dimsizes(c)-1 end if ; Convert rest of whitespace to underscores. if(opt.eq.1) then ctmp = c(ibeg1:iend1) jj = ind(ctmp.eq.space.or.ctmp.eq.tab) if(.not.any(ismissing(jj))) then ctmp(jj) = inttochar(95) ; 95 is an underscore end if return(ctmp) end if if (opt.eq.2) then ; JLV - modified the following lines to ensure that a trailing nullChar is included for opt=3 return(c(ibeg1:iend1)) end if if (opt.eq.3) then tmp = new(dimsizes(c(ibeg1:iend1))+1,character,nullChar) tmp(ibeg1:iend1) = c(ibeg1:iend1) return(tmp) end if else ; No whitespace exists return(c) end if end ;---------------------------------------------------------------------- ; This function reads in a particular field from a string array, ; given the field number to read (fields start at #1 and go to #nfield), ; and the indices of the delimiters. ; ; It returns either an integer, float, double, character, or a string, ; depending on the input flag "return_type". ; ; Last input variable, wspace_opt, is only for strings or character arrays: ; = 0 --> do nothing ; = 1 --> whitespace at the beginning and end of string are trimmed ; = 2 --> only whitespace at the beginning are removed ; = 3 --> only whitespace at the end are removed ; ; This is needed for the variable names in the header that can't start ; or end with a space. ;---------------------------------------------------------------------- function read_field(strings,ifield,indices,return_type,return_msg, \ wspace_opt:integer) local nstring, cstrings, nf, tmp_str, tmpc, ntmpc begin nrows = dimsizes(strings) ; ; Handle special case if we only have one string. Make sure it ; is put into a 2D array. ; if(nrows.eq.1) then cstrings = new((/1,strlen(strings)+1/),character) end if cstrings = stringtochar(strings) nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1) ; ; Error checking. Make sure user has entered a valid field. ; if(ifield.le.0.or.ifield.gt.nf) then print("read_field: fatal: you've selected a field that is") print("out-of-range of the number of fields that you have (" + nf + ").") exit end if ; ; Set up array to return. For string, int, float, or double arrays, ; we don't have to do anything special. For character arrays, ; however, we do. ; if(return_type.ne."character") then return_array = new(nrows,return_type,"No_FillValue") else ; ; We don't know what the biggest character array is at this point, so ; make it bigger than necessary, and then resize later as necessary. ; tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character") max_len = 0 ; Use to keep track of max lengths of strings. end if do i = 0,nrows-1 ; ; Special case of first field in row. ; if(ifield.eq.1) then ibeg = 0 iend = indices(i,ifield-1)-1 else ; ; Special case of first field in row. ; if(ifield.eq.nf) then ibeg = indices(i,ifield-2)+1 iend = dimsizes(cstrings(i,:))-1 ; ; Any field between first and last field. ; else ibeg = indices(i,ifield-2)+1 iend = indices(i,ifield-1)-1 end if end if ; ; Here's the code that pulls off the correct string, and converts it ; to a different type if desired. ; cc = chartostring(cstrings(i,ibeg:iend)) if(any(return_type.eq.(/"integer","float","double"/)).and. \ (cc.eq."".or.cc.eq." ")) then return_array(i) = return_array@_FillValue else if(return_type.eq."integer") then return_array(i) = stringtointeger(cc) if(return_msg.ne."") then return_array@_FillValue = stringtointeger(return_msg) end if end if if(return_type.eq."float") then return_array(i) = stringtofloat(cc) if(return_msg.ne."") then return_array@_FillValue = stringtofloat(return_msg) end if end if if(return_type.eq."double") then return_array(i) = stringtodouble(cc) if(return_msg.ne."") then return_array@_FillValue = stringtodouble(return_msg) end if end if if(return_type.eq."string") then return_array(i) = chartostring(deal_with_wspace(cstrings(i,ibeg:iend), \ wspace_opt)) end if if(return_type.eq."character") then ; Do we need to deal with whitespace? if(wspace_opt.gt.0) then tmpc = deal_with_wspace(cstrings(i,ibeg:iend),wspace_opt) ntmpc = dimsizes(tmpc) ; length of new string tmp_return_array(i,0:ntmpc-1) = tmpc if(ntmpc.gt.max_len) then max_len = ntmpc end if delete(tmpc) else if( (iend-ibeg+1) .gt. max_len) then max_len = iend-ibeg+1 end if tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend) end if end if end if delete(cc) end do if(return_type.eq."character") then return_array = new((/nrows,max_len/),"character") return_array = tmp_return_array(:,0:max_len-1) if(return_msg.ne."") then return_array@_FillValue = stringtochar(return_msg) else delete(return_array@_FillValue) end if end if return(return_array) end ;---------------------------------------------------------------------- ; This function reads in string fields only to get the maximum string ; length. ;---------------------------------------------------------------------- function get_maxlen(strings,ifield,indices) local nstring, cstrings, nf, tmp_str begin nrows = dimsizes(strings) ; ; Handle special case if we only have one string. Make sure it ; is put into a 2D array. ; if(nrows.eq.1) then cstrings = new((/1,strlen(strings)+1/),character) end if cstrings = stringtochar(strings) nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1) ; ; Error checking. Make sure user has entered a valid field. ; if(ifield.le.0.or.ifield.gt.nf) then print("read_field: fatal: you've selected a field that is") print("out-of-range of the number of fields that you have (" + nf + ").") exit end if ; ; We don't know what the biggest character array is at this point, so ; make it bigger than necessary, and then resize later as necessary. ; tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character") max_len = 0 ; Use to keep track of max lengths of strings. do i = 0,nrows-1 ; ; Special case of first field in row. ; if(ifield.eq.1) then ibeg = 0 iend = indices(i,ifield-1)-1 else ; ; Special case of first field in row. ; if(ifield.eq.nf) then ibeg = indices(i,ifield-2)+1 iend = dimsizes(cstrings(i,:))-1 ; ; Any field between first and last field. ; else ibeg = indices(i,ifield-2)+1 iend = indices(i,ifield-1)-1 end if end if if( (iend-ibeg+1) .gt. max_len) then max_len = iend-ibeg+1 end if end do return(max_len) end ;============================================================ ; Main code ;============================================================ begin ; ; Set up defaults here. We are hard-coding the field types here. ; You can set up this script to try to determine the field types ; automatically, but this is a bit tedious. Maybe later. ; filename = "asc5.txt" ; ASCII file to read. cdf_file = filename + ".nc" ; netCDF file to write. nfields = 6 ; # of fields delimiter = "," ; field delimiter ; ; In "asc5.txt", field #5 is type string, fields #2 and #3 ; are float, and the rest are integers. ; var_types = new(nfields,string) var_msg = new(nfields,string) ; var to hold missing values var_strlens = new(nfields,integer) ; var to hold strlens, just in case. var_msg = "" ; Default to no missing var_msg(3) = "-999" ; Corresponds to field #4 (integer) var_types = "integer" ; Default to int. var_types(4) = "character" ; Corresponds to field #5. var_types(1:2) = "float" if(isfilepresent(cdf_file)) print("Warning: '" + cdf_file + "' exists.") print("Remove before running this script.") exit ; print("Will remove it.") ; system("/bin/rm " + cdf_file) end if ; ; Read in data as strings. This will create a string array that has the ; same number of strings as there are rows in the file. We will then need ; to parse each string later. ; read_data = asciiread(filename,-1,"string") header = read_data(0) ; Header. Use for variable names. data = read_data(1:) ; Get rid of first line which is a header. nrows = dimsizes(data) ; Number of rows. ; ; Read in locations of delimiters in each string row. ; hindices = delim_indices(header,nfields,delimiter) ; header row dindices = delim_indices(data,nfields,delimiter) ; rest of file ; ; Read in the field names which will become variable names on ; the netCDF file. ; var_names = new(nfields,string) do i=0,nfields-1 var_names(i) = read_field(header,i+1,hindices,"string","",1) end do ; ; Write out this netCDF file efficiently so it will be faster. ; Try to predefine everything before you write to it. ; f = addfile(cdf_file,"c") setfileoption(f,"DefineMode",True) ; Enter predefine phase. ; ; Write global attributes to file. It's okay to do this before ; predefining the file's variables. We are still in "define" mode. ; fAtt = True fAtt@description = "Data read in from " + filename + " ASCII file." fAtt@creation_date = systemfunc ("date") fileattdef( f, fAtt ) ; ; Write dimension names to file. If there are no character variables, ; then there's only one dimension name ("nvalues"). ; ; Otherwise, we need to write a dimension name for every character ; variable, which will indicate the maximum string length for that ; variable. ; indc = ind(var_types.eq."character") if(.not.any(ismissing(indc))) then ; ; We have to treat the character arrays special here. We need to ; know their sizes so we can write the maximum size of each char ; array to the netCDF file as a dimension name. This means we ; need to read in the character variables once to get the string ; lengths, then we'll read them again later to get the actual values. ; do i=0,dimsizes(indc)-1 var_strlens(indc(i)) = get_maxlen(data,indc(i)+1,dindices) end do ndims = dimsizes(indc) + 1 dimNames = new(ndims,string) dimSizes = new(ndims,integer) dimUnlim = new(ndims,logical) dimUnlim = False dimUnlim(0) = True dimNames(0) = "nvalues" dimNames(1:ndims-1) = var_names(indc) + "_StrLen" dimSizes(0) = -1 dimSizes(1:ndims-1) = var_strlens(indc) filedimdef(f,dimNames,dimSizes,dimUnlim) else ; ; No character variables, so just write the one dimension name. ; filedimdef(f,"nvalues",-1,True) end if ; ; Define each variable on the file. ; ; Don't deal with variables that are of type string. ; do i=0,nfields-1 if(var_types(i).ne."string") then if(var_types(i).ne."character") then filevardef(f, var_names(i), var_types(i), "nvalues") else filevardef(f, var_names(i), var_types(i), \ (/"nvalues",var_names(i)+"_StrLen"/)) end if end if end do ; ; Loop through each field, read the values for that field, print ; information about the variable, and then write it to the netCDF ; file. ; do i=0,nfields-1 ifield = i+1 ; Fields start at #1, not #0. ; ; Note: you can't write strings to a netCDF file, so these have ; to be written out as character arrays. ; tmp_data = read_field(data,ifield,dindices,var_types(i),var_msg(i),0) ; ; Print some info about the variable. ; print("") print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").") print("Type is " + var_types(i) + ".") if(var_types(i).ne."string".and.var_types(i).ne."character") then print("min/max = " + min(tmp_data) + "/" + max(tmp_data)) end if if(any(ismissing(tmp_data))) then print("This variable does contain missing values.") else print("This variable doesn't contain missing values.") end if f->$var_names(i)$ = tmp_data ; Write to netCDF file. delete(tmp_data) ; Delete for next round. end do end