using DataFrames
ID = [1:332]
sizeof(ID)/sizeof(ID[1])
332.0
function corr(directory, threshold = 0)
pcorr = Array(Float64,0)
for (i = 1:332)
df = readtable(@sprintf("%s/%03d.csv",directory,i))
v2 = Array(Float64,0)
v3 = Array(Float64,0)
for j = 1:size(df)[1]
if(!isna(df[j,2]) & !isna(df[j,3]))
push!(v2, df[j,2])
push!(v3, df[j,3])
end
end
size(v2)[1] > threshold && push!(pcorr, cor(v2,v3))
end
return(pcorr)
end
pcorr = corr("specdata")
@time corr("specdata") ;
elapsed time: 1.676303646 seconds (511259896 bytes allocated)
function corr2(directory, threshold = 0)
pcorr = Array(Float64,0)
for (i = 1:332)
df = readcsv(@sprintf("%s/%03d.csv",directory,i),has_header=true)
v2 = Array(Float64,0)
v3 = Array(Float64,0)
for j = 1:size(df[1],1)
if((df[1][j,2] != "NA") & (df[1][j,3] != "NA"))
push!(v2, float(df[1][j,2]))
push!(v3, float(df[1][j,3]))
end
end
size(v2)[1] > threshold && push!(pcorr, cor(v2,v3))
end
return(pcorr)
end
pcorr = corr2("specdata",400)
@time corr2("specdata") ;
elapsed time: 1.157388875 seconds (374348556 bytes allocated)
function corr3(directory, threshold = 0)
pcorr = Array(Float64,0)
for (i = 1:332)
df = readdlm(@sprintf("%s/%03d.csv",directory,i),has_header=true)[1]
v2 = Array(Float64,0)
v3 = Array(Float64,0)
for j = 1:size(df,1)
#"2004-06-24",NA,NA,1
# skip if you match a NA in the string
if !ismatch(r"NA", df[j][14:end])
c2 = c1 = 0
# find the two commas in the string to find the numbers
for k = 14:length(df[j])
if df[j][k] == ','
if c1 == 0 c1 = k-1
else c2 = k-1
end
end
end
push!(v2,float(df[j][14:c1]))
push!(v3,float(df[j][c1+2:c2]))
end
end
size(v2)[1] > threshold && push!(pcorr, cor(v2,v3))
end
return(pcorr)
end
pcorr = corr3("specdata",400)
@time corr3("specdata") ;
elapsed time: 1.137150839 seconds (479843724 bytes allocated)