using DataFrames ID = [1:332] sizeof(ID)/sizeof(ID[1]) function corr(directory, threshold = 0) pcorr = Array(Float64,0) for (i = 1:332) df = readtable(@sprintf("%s/%03d.csv",directory,i)) v2 = Array(Float64,0) v3 = Array(Float64,0) for j = 1:size(df)[1] if(!isna(df[j,2]) & !isna(df[j,3])) push!(v2, df[j,2]) push!(v3, df[j,3]) end end size(v2)[1] > threshold && push!(pcorr, cor(v2,v3)) end return(pcorr) end pcorr = corr("specdata") @time corr("specdata") ; function corr2(directory, threshold = 0) pcorr = Array(Float64,0) for (i = 1:332) df = readcsv(@sprintf("%s/%03d.csv",directory,i),has_header=true) v2 = Array(Float64,0) v3 = Array(Float64,0) for j = 1:size(df[1],1) if((df[1][j,2] != "NA") & (df[1][j,3] != "NA")) push!(v2, float(df[1][j,2])) push!(v3, float(df[1][j,3])) end end size(v2)[1] > threshold && push!(pcorr, cor(v2,v3)) end return(pcorr) end pcorr = corr2("specdata",400) @time corr2("specdata") ; function corr3(directory, threshold = 0) pcorr = Array(Float64,0) for (i = 1:332) df = readdlm(@sprintf("%s/%03d.csv",directory,i),has_header=true)[1] v2 = Array(Float64,0) v3 = Array(Float64,0) for j = 1:size(df,1) #"2004-06-24",NA,NA,1 # skip if you match a NA in the string if !ismatch(r"NA", df[j][14:end]) c2 = c1 = 0 # find the two commas in the string to find the numbers for k = 14:length(df[j]) if df[j][k] == ',' if c1 == 0 c1 = k-1 else c2 = k-1 end end end push!(v2,float(df[j][14:c1])) push!(v3,float(df[j][c1+2:c2])) end end size(v2)[1] > threshold && push!(pcorr, cor(v2,v3)) end return(pcorr) end pcorr = corr3("specdata",400) @time corr3("specdata") ;