How to solve semidefinite optimization problems in Julia
Shuvomoy Das Gupta
January 1, 2021
In this blog, we discuss how to solve semidefinite programs (SDPs) in Julia using Convex.jl. We will consider two examples: (i) standard form semidefinite program, (ii) a somewhat more complicated semidefinite program.
Table of contents
Standard form sdp
We consider optimization problem of the form:
where is the decision variable, and each of the matrices and are also in . By the notation , we denote the set of all symmetric matrices.
First, we load the necessary Julia packages.
using SCS, COSMO, MosekTools, JuMP, LinearAlgebra
using BenchmarkTools Let us create data randomly.
function random_mat_create(n)
# this function creates a symmetric n×n matrix
A = randn(n,n)
A = A'*A
A = (A+A')/2
return A
end Here is the data generation process, please change it to your need.
n = 10
m = 20
# set of all data matrices A_i
# the data matrix A = [A1 A2 A3 ....]
A = zeros(n, m*n)
b = zeros(m)
# just ensuring our problem is feasible
X_test = rand(n,n)
X_test = X_test'*X_test
X_test = (X_test+X_test')/2
for i in 1:m
A[:, (i-1)*n+1:i*n] .= random_mat_create(n)
b[i] = tr(A[:, (i-1)*n+1:i*n]*X_test)
end
C = abs.(random_mat_create(n)) The following function solves the underlying SDP.
function solve_SDP(A, b, C; solver_name=:COSMO)
# Create variable
if solver_name == :COSMO
model = Model(COSMO.Optimizer)
elseif solver_name == :Mosek
model = Model(optimizer_with_attributes(Mosek.Optimizer))
end
set_silent(model)
@variable(model, X[1:n, 1:n], PSD)
@objective(model, Min, tr(C * X));
for j in 1:m
A_j = A[:, (j - 1) * n + 1:j * n]
@constraint(model, tr(A_j * X) == b[j])
end
optimize!(model)
status = JuMP.termination_status(model)
X_sol = JuMP.value.(X)
obj_value = JuMP.objective_value(model)
return status, X_sol, obj_value
end Time to solve the problem.
status, X_sol, obj_value = solve_SDP(A, b, C; solver_name=:Mosek)
out: (MathOptInterface.OPTIMAL, [2.907044311952373 1.7130367276575142 … -0.056145513617222656 3.0230926674218024; 1.7130367276575142 3.419039624378557 … 1.0871948703965775 2.0577919984154334; … ; -0.056145513617222656 1.0871948703965775 … 0.7127834057861842 0.5195987956934747; 3.0230926674218024 2.0577919984154334 … 0.5195987956934747 4.234108180247669], 939.2696385581793)
Lets see which solver is faster, COSMO or Mosek.
b1 = @benchmark solve_SDP(A, b, C; solver_name=:COSMO)
println("benchmark for COSMO")
println("*************************")
io = IOBuffer()
show(io, "text/plain", b1)
s = String(take!(io))
println(s)
benchmark for COSMO
*************************
BenchmarkTools.Trial:
memory estimate: 4.33 MiB
allocs estimate: 35786
--------------
minimum time: 36.964 ms (0.00% GC)
median time: 40.552 ms (0.00% GC)
mean time: 40.787 ms (1.14% GC)
maximum time: 46.920 ms (9.97% GC)
--------------
samples: 123
evals/sample: 1
b2 = @benchmark solve_SDP(A, b, C; solver_name=:Mosek)
println("benchmark for Mosek")
println("***************************")
io = IOBuffer()
show(io, "text/plain", b2)
s = String(take!(io))
println(s)
benchmark for Mosek
***************************
BenchmarkTools.Trial:
memory estimate: 3.81 MiB
allocs estimate: 32015
--------------
minimum time: 6.647 ms (0.00% GC)
median time: 7.524 ms (0.00% GC)
mean time: 8.374 ms (5.00% GC)
maximum time: 19.410 ms (25.35% GC)
--------------
samples: 597
evals/sample: 1
So, on average, Mosek seems to be 5 times faster than COSMO.
Complicated sdp
Denote,
and
where is the unit vector with th component equal to and the rest being zero. Next, define
where
Then, the performance estimation optimization algorithm is:
where the decision variables are and
# Load the packages
using JuMP, MosekTools, Mosek, LinearAlgebra, SCS, COSMO, Literate, OffsetArrays
# %% Some helper functions
# %% construct e_i in R^n
function e_i(n, i)
e_i_vec = zeros(n, 1)
e_i_vec[i] = 1
return e_i_vec
end
# %% construct symmetric outer product
function ⊙(a,b)
return ((a*b')+(b*a')) ./ 2
end
# %% Parameters to be tuned
N = 5
L = 1
μ = 0
c_w = 1
c_f = 0
Putting everything in a function. Now, let us put everything in a function. We need to add the packages first.
function full_pep_solver(N, L, μ, c_w, c_f)
# define all the bold vectors
# --------------------------
# define 𝐰_0
𝐰_0 = e_i(N+2, 1)
𝐰_star = zeros(N+2, 1)
𝐠_star = zeros(N+2,1)
# define 𝐠_0, 𝐠_1, …, 𝐠_N
# first we define 𝐠_Julia vectors and then 𝐠 vectors
𝐠 = OffsetArray(zeros(N+2, N+1), 1:N+2, 0:N)
# 𝐠= [𝐠_0 𝐠_1 𝐠_2 ... 𝐠_N]
for i in 0:N
𝐠[:,i] = e_i(N+2, i+2)
end
𝐟_star = zeros(N+1,1)
# time to define 𝐟_0, 𝐟_1, …, 𝐟_N
𝐟 = OffsetArray(zeros(N+1, N+1), 1:N+1, 0:N)
# 𝐟 = [𝐟_0, 𝐟_1, …, 𝐟_N]
for i in 0:N
𝐟[:,i] = e_i(N+1, i+1)
end
# Define JuMP model
# ----------------
pep_model = Model(optimizer_with_attributes(Mosek.Optimizer, "MSK_DPAR_INTPNT_CO_TOL_PFEAS" => 1e-10))
#define all the variables
# -------------------------------
# define α′ (can be typed as \alpha[TAB]\prime[TAB])
@variable(pep_model, α′[1:N, 0:N-1])
# define λ variables
@variable(pep_model, λ_i_ip1[0:N-1] >= 0)
# this defines (λ_{i,i+1})_{i∈[0:N-1]} in Julia indexing
@variable(pep_model, λ_star_i[0:N] >= 0)
# this defines (λ_{⋆,i})_{i∈[0:N]} in Julia indexing
# define τ
@variable(pep_model, τ >= 0)
# define objective
# ------------------
@objective(
pep_model,
Min,
τ
)
# Add the linear equality constraint
# ----------------------------------
@constraint(pep_model,
τ * c_f .* 𝐟[:,0] + sum(λ_i_ip1[i] .* (𝐟[:,i+1]-𝐟[:,i]) for i in 0:N-1)
+ sum(λ_star_i[i] .* (𝐟[:,i] - 𝐟_star) for i in 0:N)
.== 𝐟[:,N] - 𝐟_star
)
# add the LMI constraint now
@constraint(pep_model,
# term_1:
c_w * τ * ⊙(𝐰_0,𝐰_0) +
# term_2 (part 1+part 2):
(sum(λ_i_ip1[i] .* ⊙(𝐠[:,i]-𝐠[:,i+1],𝐠[:,i]-𝐠[:,i+1]) for i in 0:N-1) +
sum(λ_star_i[i] .* ⊙(𝐠_star - 𝐠[:,i],𝐠_star - 𝐠[:,i]) for i in 0:N) ) ./ (2*L) +
# term_3:
(- λ_star_i[0] .* ⊙(𝐠[:,0],𝐰_0) ) +
# term_4 part 1:
(- sum( λ_i_ip1[i] .* ⊙(𝐠[:,i],𝐰_0) for i in 1:N-1)) +
# term_4 part 2:
(1/L) .* sum( sum(α′[i,j] .* ⊙(𝐠[:,i],𝐠[:,j]) for j in 0:i-1) for i in 1:N-1) +
# term 5:
(- (⊙(𝐠[:,N],𝐰_0) - (1/L) .* sum(α′[N,j] .* ⊙(𝐠[:,N],𝐠[:,j]) for j in 0:N-1))) +
# term_6 part 1:
sum(λ_i_ip1[i] .* ⊙(𝐠[:,i+1],𝐰_0) for i in 0:N-1) +
# term_6 part 2:
(- (1/L) .* sum( sum( α′[i,j] .* ⊙(𝐠[:,i+1],𝐠[:,j]) for j in 0:i-1) for i in 1:N-1))
in PSDCone()
)
# time to optimize!
# -----------------
optimize!(pep_model)
println("termination status =", termination_status(pep_model) )
α′_opt = OffsetArray(value.(α′), 1:N, 0:N-1)
λ_opt_i_ip1 = OffsetVector(value.(λ_i_ip1), 0:N-1)
λ_opt_star_i = OffsetVector(value.(λ_star_i), 0:N)
τ_opt = value(τ)
# Recover α_{i,j} from α′_{i,j}
# -----------------------------
α_opt = OffsetArray(zeros(size(α′_opt)), 1:N, 0:N-1)
for i in 1:N-1
for j in 0:i-1
α_opt[i,j] = (α′_opt[i,j] / λ_opt_i_ip1[i])
end
end
for j in 0:N-1
α_opt[N,j] = α′_opt[N,j]
end
# Recover h_{i,j} from α_{i,j}
# ----------------------------
h_opt = OffsetArray(zeros(size(α_opt)), 1:N, 0:N-1)
# set h(1,0)
h_opt[1,0] = α_opt[1,0]
# set the rest of the variables
for i in 2:N
for j in 0:i-1
if j == i-1
h_opt[i,j] = α_opt[i,j]
else
h_opt[i,j] = α_opt[i,j] - α_opt[i-1,j]
end
end
end
# return all the outputs
# ----------------------
return α′_opt, λ_opt_i_ip1, λ_opt_star_i, τ_opt, α_opt, h_opt
end
Time to run and test.
α′_opt, λ_opt_i_ip1, λ_opt_star_i, τ_opt, α_opt, h_opt = full_pep_solver(N, L, μ, c_w, c_f)