Julia is a high-level dynamic programming language designed to address the requirements of high-performance numerical and scientific computing while also being effective for general purpose programming.
Julia's core is implemented in C and C++, its parser in Scheme, and the LLVM compiler framework is used for just-in-time generation of machine code for x86(-64).
Development began in 2009, open-sourced in February 2012
Currently has 250+ contributors to the language, 400+ overall
Stable release: v0.2.1 (2014/02/11), pre-release: v0.3 (nightly build)
Source: GitHub, 2014/06/30 - Source code by Jiahao Chen
That depends on your use case:
subtypes(Number)
2-element Array{Any,1}: Complex{T<:Real} Real
subtypes(Real)
4-element Array{Any,1}: FloatingPoint Integer MathConst{sym} Rational{T<:Integer}
subtypes(Integer)
5-element Array{Any,1}: BigInt Bool Char Signed Unsigned
# Floating Point
@show 5/3
# Mathematical Constant
@show pi
# Rational
@show 2//3 + 1
# BigInt
@show big(2) ^ 1000 ;
5 / 3 => 1.6666666666666667 pi => π = 3.1415926535897... 2 // 3 + 1 => 5//3 big(2)^1000 => 10715086071862673209484250490600018105614048117055336074437503883703510511249361224931983788156958581275946729175531468251871452856923140435984577574698574803934567774824230985421074605062371141877954182153046474983581941267398767559165543946077062914571196477686542167660429831652624386837205668069376
subtypes(String)
8-element Array{Any,1}: DirectIndexString GenericString RepString RevString{T<:String} RopeString SubString{T<:String} UTF16String UTF8String
s = "Hello World"
@show typeof(s)
@show s[7] ;
typeof(s) => ASCIIString s[7] => 'W'
# Unicode Names and Values
你好 = "(。◕_◕。)ノ "
@show typeof(你好)
@show 你好 ^ 3 ;
typeof(你好) => UTF8String 你好^3 => "(。◕_◕。)ノ (。◕_◕。)ノ (。◕_◕。)ノ "
type NewType
i::Integer
s::String
end
new_t = NewType(33, "this is a NewType")
@show new_t.i
@show new_t.s ;
new_t.i => 33 new_t.s => "this is a NewType"
# Vectors
v = [1, 1]
2-element Array{Int64,1}: 1 1
# Vector Operations
@show v + [2, 0] # vector addition
@show v + 1 # same as v + [1,1]
@show 5*v # scalar multiplication
v + [2,0] => [3,1] v + 1 => [2,2] 5v => [5,5]
2-element Array{Int64,1}: 5 5
println( "Dot Product : ", dot(v, v) )
println( "Norm : ", norm(v) )
Dot Product : 2 Norm : 1.4142135623730951
# Matrices
M = [1 1 ; 0 1]
2x2 Array{Int64,2}: 1 1 0 1
# Matrix Addition
M + 1 ,
M + [0 0 ; 5 5]
( 2x2 Array{Int64,2}: 2 2 1 2, 2x2 Array{Int64,2}: 1 1 5 6)
# Matrix Multiplication
2M ,
M ^ 2 ,
M * v
( 2x2 Array{Int64,2}: 2 2 0 2, 2x2 Array{Int64,2}: 1 2 0 1, [2,1])
# Gaussian Elimination
b = M * v
M \ b # solve back for v
2-element Array{Float64,1}: 1.0 1.0
# Named functions
f(x) = 10x
function g(x)
return x * 10
end
@show f(5)
@show g(5) ;
f(5) => 50 g(5) => 50
# Anonymous functions assigned to variables
h = x -> x * 10
i = function(x)
x * 10
end
@show h(5)
@show i(5) ;
h(5) => 50 i(5) => 50
# Operators are functions
+(4,5)
9
p = +
p(2,3)
5
bar(x::String) = println("You entered the string: $x")
bar(x::Integer) = x * 10
bar(x::NewType) = println(x.s)
methods(bar)
bar("Hello")
bar(new_t)
bar(5)
You entered the string: Hello this is a NewType
50
# Adding strings
"Hello" + "World"
`+` has no method matching +(::ASCIIString, ::ASCIIString) while loading In[22], in expression starting on line 3
# But the addition operator is a function, so we can apply multi-dispatch
+(a::String, b::String) = a * b
"Hello" + "World"
"HelloWorld"
+(a::Number, b::String) = string(a) + b
+(a::String, b::Number) = a + string(b)
99 + "bottles"
"99bottles"
# Method Overloading
type SimpleObject
data::Union(Integer, String)
set::Function
function SimpleObject()
this = new()
this.data = ""
function setter(x::Integer)
println("Setting an integer")
this.data = x
end
function setter(x::String)
println("Setting a string")
this.data = x
end
this.set = setter
return this
end
end
obj = SimpleObject()
obj.set(99)
obj.set("hello")
Setting an integer Setting a string
"hello"
# Sum of odd integers between 1 and 5
values = 1:5
myMapper = x -> x
myFilter = x -> x % 2 == 1
myReducer = (x,y) -> x + y
mapped = map( myMapper, values )
filtered = filter( myFilter, mapped )
reduced = reduce( myReducer, filtered )
9
# Code Generation
# Functions for exponentiating to the powers of 1 to 5
for n in 1:5
s = "power$n(x) = x ^ $n"
println(s)
expression = parse(s)
eval(expression)
end
power5( 2 )
power1(x) = x ^ 1 power2(x) = x ^ 2 power3(x) = x ^ 3 power4(x) = x ^ 4 power5(x) = x ^ 5
32
# Macros: Crude Timer Example
macro timeit(expression)
quote
t = time()
result = $expression # evaluation
elapsed = time() - t
println( "elapsed time: ", elapsed )
return result
end
end
@timeit cos(2pi)
@timeit cos(2pi)
elapsed time: 0.005074977874755859 elapsed time: 4.0531158447265625e-6
1.0
using StatsBase
x = rand(100) # uniform distribution [0,1)
println( "mean: ", mean(x) )
println( "variance: ", var(x) )
println( "skewness: ", skewness(x) )
println( "kurtosis: ", kurtosis(x) )
mean: 0.5260291483830464 variance: 0.09076375466564988 skewness: 0.007890698485229702 kurtosis: -1.3205549430417554
describe(x)
Summary Stats: Mean: 0.526029 Minimum: 0.002137 1st Quartile: 0.287252 Median: 0.495243 3rd Quartile: 0.809833 Maximum: 0.995268
using Distributions
distr = Normal(0, 2)
println( "pdf @ origin = ", pdf(distr, 0.0) )
println( "cdf @ origin = ", cdf(distr, 0.0) )
pdf @ origin = 0.19947114020071635 cdf @ origin = 0.5
x = rand(distr, 1000)
fit_mle(Normal, x)
Normal( μ=-0.010365910392224809 σ=2.0295200120189767 )
using DataFrames
df = DataFrame(
A = [6, 3, 4],
B = ["a", "b", "c"],
C = [1//2, 3//4, 5//6],
D = [true, true, false]
)
df[:C][2] = NA
df
A | B | C | D | |
---|---|---|---|---|
1 | 6 | a | 1//2 | true |
2 | 3 | b | NA | true |
3 | 4 | c | 5//6 | false |
# Joins
names = DataFrame(ID = [5, 4], Name = ["Jack", "Jill"])
jobs = DataFrame(ID = [5, 4], Job = ["Lawyer", "Doctor"])
full = join(names, jobs, on = :ID)
ID | Name | Job | |
---|---|---|---|
1 | 4 | Jill | Doctor |
2 | 5 | Jack | Lawyer |
using RDatasets
iris = dataset("datasets", "iris")
head(iris)
SepalLength | SepalWidth | PetalLength | PetalWidth | Species | |
---|---|---|---|---|---|
1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |
# Group by Species, then compute mean of PetalLength per group
by( iris, :Species, df -> mean(df[:PetalLength]) )
Species | x1 | |
---|---|---|
1 | setosa | 1.462 |
2 | versicolor | 4.26 |
3 | virginica | 5.552 |
using ASCIIPlots
x = iris[:PetalLength]
y = iris[:PetalWidth]
scatterplot(x, y)
------------------------------------------------------------- | ^ ^ | 2.50 | ^ ^ | | ^ ^^^ ^ ^^ ^| | ^ ^ ^ | | ^^ ^ ^^ ^ ^ ^^ ^ | | ^ ^ ^ | | ^^^ ^ ^ ^ ^ | | ^ ^ | | ^ ^ ^ ^^ ^ | | ^ ^^ ^^ ^ | | ^ ^ ^^^^ | | ^ ^ ^ ^ ^ | | ^ ^ ^ ^^ ^ | | | | | | | | ^ | | ^ ^^ ^ | | ^ ^^ | |^^ ^ ^^ ^ | 0.10 ------------------------------------------------------------- 1.00 6.90
using Winston
scatter(x, y, ".")
xlabel("PetalLength")
ylabel("PetalWidth")
using Gadfly
set_default_plot_size(20cm, 12cm)
plot(iris, x = "PetalLength", y = "PetalWidth", color = "Species", Geom.point)
# K-means Clustering
using Clustering
features = array(iris[:, 1:4])' # use matrix() on Julia v0.2
result = kmeans( features, 3 ) # onto 3 clusters
plot(iris, x = "PetalLength", y = "PetalWidth", color = result.assignments, Geom.point)
Warning: using DataFrames.describe in module Main conflicts with an existing identifier. Warning: could not import StatsBase.bandwidth into Stat Warning: could not import StatsBase.kde into Stat
Iters objv objv-change | affected ------------------------------------------------------------- 1 8.200215e+01 -6.279785e+01 | 2 2 8.108093e+01 -9.212131e-01 | 2 3 7.987358e+01 -1.207354e+00 | 2 4 7.934436e+01 -5.292157e-01 | 2 5 7.892131e+01 -4.230544e-01 | 2 6 7.885567e+01 -6.564390e-02 | 0 7 7.885567e+01 0.000000e+00 | 0 K-means converged with 7 iterations (objv = 78.85566582597716)
# Principal Component Analysis
using MultivariateStats
pc = fit(PCA, features; maxoutdim = 2)
reduced = transform(pc, features)
@show size(reduced)
plot(iris, x = reduced[1,:], y = reduced[2,:], color = "Species", Geom.point)
size(reduced) => (2,150)
using MultivariateStats
# Generate a noisy linear system
features = rand(1000, 3) # feature matrix
coeffs = rand(3) # ground truth of weights
targets = features * coeffs + 0.1 * randn(1000) # generate response
# Linear Least Square Regression
coeffs_llsq = llsq(features, targets; bias=false)
# Ridge Regression
coeffs_ridge = ridge(features, targets, 0.1; bias=false) # regularization coef = 0.1
@show coeffs
@show coeffs_llsq
@show coeffs_ridge ;
coeffs => [0.909725259136879,0.09457886909449087,0.5497737690044144] coeffs_llsq => [0.9136892428062334,0.09038032773513839,0.5584636569649853] coeffs_ridge => [0.9131715345035267,0.09081871314618502,0.5583822928501584]
# Cross Validation: K-Fold Example
using MLBase, MultivariateStats
n = length(targets)
# Define training and error evaluation functions
function training(inds)
coeffs = ridge(features[inds, :], targets[inds], 0.1; bias=false)
return coeffs
end
function error_evaluation(coeffs, inds)
y = features[inds, :] * coeffs
rms_error = sqrt(mean(abs2(targets[inds] .- y)))
return rms_error
end
# Cross validate
scores = cross_validate(
inds -> training(inds),
(coeffs, inds) -> error_evaluation(coeffs, inds),
n, # total number of samples
Kfold(n, 3)) # cross validation plan: 3-fold
# Get the mean and std of scores
@show scores
@show mean_and_std(scores) ;
scores =>
Warning: using MLBase.transform in module Main conflicts with an existing identifier.
[0.09242034479832754,0.10521231254886307,0.10191139455606114] mean_and_std(scores) => (0.0998480173010839,0.006640915148151889)
# Model Tuning: Grid Search
using MLBase, MultivariateStats
# Hold out 20% of records for testing
n_test = int(length(targets) * 0.2)
train_rows = shuffle([1:length(targets)] .> n_test)
features_train, features_test = features[train_rows, :], features[!train_rows, :]
targets_train, targets_test = targets[train_rows], targets[!train_rows]
# Define estimation function
function estfun(regcoef, bias)
coeffs = ridge(features_train, targets_train, regcoef; bias=bias)
return bias ? (coeffs[1:end-1], coeffs[end]) : (coeffs, 0.0)
end
# Define error evaluation function as mean squared deviation
evalfun(coeffs) = msd(features_test * coeffs[1] + coeffs[2], targets_test)
result = gridtune(estfun, evalfun,
("regcoef", [0.01, 0.1, 1.0]),
("bias", [true, false]);
ord=Reverse, # smaller msd value indicates better model
verbose=true) # show progress information
best_model, best_config, best_score = result
# Print results
coeffs, bias = best_model
println("Best model:")
println(" coeffs = $(coeffs')"),
println(" bias = $bias")
println("Best config: regcoef = $(best_config[1]), bias = $(best_config[2])")
println("Best score: $(best_score)")
[regcoef=0.01, bias=true] => 0.011858694727414574 [regcoef=0.1, bias=true] => 0.011850442117052462 [regcoef=1.0, bias=true] => 0.011787552735182201 [regcoef=0.01, bias=false] => 0.011804804972495204 [regcoef=0.1, bias=false] => 0.011801611222973227 [regcoef=1.0, bias=false] => 0.011777881905009186 Best model: coeffs = [0.9133644779181795 0.09101416771441581 0.5528567682069362] bias = 0.0 Best config: regcoef = 1.0, bias = false Best score: 0.011777881905009186
# Regression Tree
using DecisionTree
# Train model, make predictions on test records
model = build_tree(targets_train, features_train)
predictions = apply_tree(model, features_test)
@show cor(targets_test, predictions)
@show R2(targets_test, predictions)
scatter(targets_test, predictions, ".")
xlabel("actual"); ylabel("predicted")
cor(targets_test,predictions) => 0.9062575173707162 R2(targets_test,predictions) => 0.8095402909108701
# Support Vector Machine
using LIBSVM
features = array(iris[:, 1:4])
labels = array(iris[:Species])
# Hold out 20% of records for testing
n_test = int(length(labels) * 0.2)
train_rows = shuffle([1:length(labels)] .> n_test)
features_train, features_test = features[train_rows, :], features[!train_rows, :]
labels_train, labels_test = labels[train_rows], labels[!train_rows]
model = svmtrain(labels_train, features_train')
(predictions, decision_values) = svmpredict(model, features_test')
confusion_matrix(labels_test, predictions)
Classes: ASCIIString["setosa","versicolor","virginica"] Matrix: 3x3 Array{Int64,2}: 15 0 0 0 9 1 0 0 5 Accuracy: 0.9666666666666667 Kappa: 0.9459459459459458
# Random Forest
using DecisionTree
# Train forest using 2 random features per split and 10 trees
model = build_forest(labels_train, features_train, 2, 10)
predictions = apply_forest(model, features_test)
# Pretty print of one tree in forest
print_tree(model.trees[1])
confusion_matrix(labels_test, predictions)
Feature 4, Threshold 1.0 L-> setosa : 25/25 R-> Feature 1, Threshold 6.3 L-> Feature 3, Threshold 4.8 L-> versicolor : 25/25 R-> Feature 2, Threshold 2.7 L-> virginica : 2/2 R-> Feature 4, Threshold 1.8 L-> versicolor : 2/2 R-> Feature 3, Threshold 4.9 L-> Feature 1, Threshold 6.2 L-> versicolor : 1/1 R-> virginica : 1/1 R-> virginica : 3/3 R-> Feature 1, Threshold 6.9 L-> Feature 3, Threshold 5.0 L-> versicolor : 6/6 R-> virginica : 10/10 R-> virginica : 9/9
Classes: {"setosa","versicolor","virginica"} Matrix: 3x3 Array{Int64,2}: 15 0 0 0 8 2 0 0 5 Accuracy: 0.9333333333333333 Kappa: 0.8928571428571429