diff --git a/Project.toml b/Project.toml index 026a29ba7..a514f9d23 100644 --- a/Project.toml +++ b/Project.toml @@ -42,45 +42,48 @@ Adapt = "4" AdvancedHMC = "0.6.1" Aqua = "0.8" ArrayInterface = "7.9" -CUDA = "5.3" +CUDA = "5.3.2" ChainRulesCore = "1.24" -ComponentArrays = "0.15.14" +ComponentArrays = "0.15.16" Cubature = "1.5" DiffEqNoiseProcess = "5.20" Distributions = "0.25.107" DocStringExtensions = "0.9.3" DomainSets = "0.6, 0.7" -Flux = "0.14.11" +Flux = "0.14.17" ForwardDiff = "0.10.36" -Functors = "0.4.10" +Functors = "0.4.12" Integrals = "4.4" LineSearches = "7.2" -LinearAlgebra = "1" +LinearAlgebra = "1.10" LogDensityProblems = "2" -Lux = "0.5.58" +Lux = "0.5.65" LuxCUDA = "0.3.2" +LuxCore = "0.1.24" +LuxLib = "0.3.48" MCMCChains = "6" MethodOfLines = "0.11" ModelingToolkit = "9.9" MonteCarloMeasurements = "1.1" Optim = "1.7.8" -Optimization = "3.24, 4" +Optimization = "3.25" OptimizationOptimJL = "0.2.1" -OptimizationOptimisers = "0.2.1, 0.3" -OrdinaryDiffEq = "6.74" -Pkg = "1" +OptimizationOptimisers = "0.2.1" +OrdinaryDiffEq = "6.87" +Pkg = "1.10" +Preferences = "1.4.3" QuasiMonteCarlo = "0.3.2" Random = "1" Reexport = "1.2" RuntimeGeneratedFunctions = "0.5.12" SafeTestsets = "0.1" -SciMLBase = "2.28" +SciMLBase = "2.34" Statistics = "1.10" SymbolicUtils = "1.5, 2, 3" Symbolics = "5.27.1, 6" -Test = "1" +Test = "1.10" UnPack = "1" -Zygote = "0.6.69" +Zygote = "0.6.70" julia = "1.10" [extras] @@ -89,12 +92,15 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda" +LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623" +LuxLib = "82251201-b29d-42c6-8e01-566dec8acb11" MethodOfLines = "94925ecb-adb7-4558-8ed8-f975c56a0bf4" OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +Preferences = "21216c6a-2e73-6563-6e65-726566657250" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Aqua", "Test", "CUDA", "SafeTestsets", "OptimizationOptimJL", "Pkg", "OrdinaryDiffEq", "LineSearches", "LuxCUDA", "Flux", "MethodOfLines"] +test = ["Aqua", "CUDA", "Flux", "LineSearches", "LuxCUDA", "LuxCore", "LuxLib", "MethodOfLines", "OptimizationOptimJL", "OrdinaryDiffEq", "Pkg", "Preferences", "SafeTestsets", "Test"] diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl index 7105346aa..116478a51 100644 --- a/src/advancedHMC_MCMC.jl +++ b/src/advancedHMC_MCMC.jl @@ -109,7 +109,7 @@ function L2loss2(Tar::LogTargetDensity, θ) # parameter estimation chosen or not if Tar.extraparams > 0 autodiff = Tar.autodiff - # Timepoints to enforce Physics + # Timepoints to enforce Physics t = Tar.dataset[end] u1 = Tar.dataset[2] û = Tar.dataset[1] @@ -122,22 +122,23 @@ function L2loss2(Tar::LogTargetDensity, θ) if length(Tar.prob.u0) == 1 physsol = [f(û[i], - ode_params, - t[i]) + ode_params, + t[i]) for i in 1:length(û[:, 1])] else physsol = [f([û[i], u1[i]], - ode_params, - t[i]) + ode_params, + t[i]) for i in 1:length(û)] end - #form of NN output matrix output dim x n + #form of NN output matrix output dim x n deri_physsol = reduce(hcat, physsol) - + physlogprob = 0 for i in 1:length(Tar.prob.u0) - # can add phystd[i] for u[i] - physlogprob += logpdf(MvNormal(deri_physsol[i, :], + # can add phystd[i] for u[i] + physlogprob += logpdf( + MvNormal(deri_physsol[i, :], LinearAlgebra.Diagonal(map(abs2, (Tar.l2std[i] * 4.0) .* ones(length(nnsol[i, :]))))), @@ -353,7 +354,10 @@ NN OUTPUT AT t,θ ~ phi(t,θ). function (f::LogTargetDensity{C, S})(t::AbstractVector, θ) where {C <: Lux.AbstractExplicitLayer, S} θ = vector_to_parameters(θ, f.init_params) - y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st) + θ_ = ComponentArrays.getdata(θ) + eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_) + t_ = convert.(eltypeθ, adapt(typeθ, t')) + y, st = f.chain(t_, θ, f.st) ChainRulesCore.@ignore_derivatives f.st = st f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y end @@ -361,7 +365,10 @@ end function (f::LogTargetDensity{C, S})(t::Number, θ) where {C <: Lux.AbstractExplicitLayer, S} θ = vector_to_parameters(θ, f.init_params) - y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st) + θ_ = ComponentArrays.getdata(θ) + eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_) + t_ = convert.(eltypeθ, adapt(typeθ, [t])) + y, st = f.chain(t_, θ, f.st) ChainRulesCore.@ignore_derivatives f.st = st f.prob.u0 .+ (t .- f.prob.tspan[1]) .* y end @@ -574,7 +581,8 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain; @info("Current Prior Log-likelihood : ", priorweights(ℓπ, initial_θ)) @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, initial_θ)) if estim_collocate - @info("Current gradient loss against dataset Log-likelihood : ", L2loss2(ℓπ, initial_θ)) + @info("Current gradient loss against dataset Log-likelihood : ", + L2loss2(ℓπ, initial_θ)) end Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor], @@ -628,7 +636,8 @@ function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain; @info("Current Prior Log-likelihood : ", priorweights(ℓπ, samples[end])) @info("Current MSE against dataset Log-likelihood : ", L2LossData(ℓπ, samples[end])) if estim_collocate - @info("Current gradient loss against dataset Log-likelihood : ", L2loss2(ℓπ, samples[end])) + @info("Current gradient loss against dataset Log-likelihood : ", + L2loss2(ℓπ, samples[end])) end # return a chain(basic chain),samples and stats diff --git a/src/discretize.jl b/src/discretize.jl index 9a40e0fe8..8acd77533 100644 --- a/src/discretize.jl +++ b/src/discretize.jl @@ -217,21 +217,21 @@ function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars::D bcs_train_sets = map(bound_args) do bt span = map(b -> get(dict_var_span, b, b), bt) - _set = adapt(eltypeθ, - hcat(vec(map(points -> collect(points), Iterators.product(span...)))...)) + _set = hcat(vec(map(points -> collect(points), Iterators.product(span...)))...) + x = convert.(eltypeθ, adapt(eltypeθ, _set)) end pde_vars = get_variables(eqs, dict_indvars, dict_depvars) pde_args = get_argument(eqs, dict_indvars, dict_depvars) - pde_train_set = adapt(eltypeθ, - hcat(vec(map(points -> collect(points), - Iterators.product(bc_data...)))...)) + # pde_train_set = adapt(eltypeθ, + # hcat(vec(map(points -> collect(points), + # Iterators.product(bc_data...)))...)) pde_train_sets = map(pde_args) do bt span = map(b -> get(dict_var_span_, b, b), bt) - _set = adapt(eltypeθ, - hcat(vec(map(points -> collect(points), Iterators.product(span...)))...)) + _set = hcat(vec(map(points -> collect(points), Iterators.product(span...)))...) + x = convert.(eltypeθ, adapt(eltypeθ, _set)) end [pde_train_sets, bcs_train_sets] end @@ -266,11 +266,11 @@ function get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars, pde_lower_bounds = map(pde_args) do pd span = map(p -> get(dict_lower_bound, p, p), pd) - map(s -> adapt(eltypeθ, s) + cbrt(eps(eltypeθ)), span) + map(s -> convert(eltypeθ, adapt(eltypeθ, s)) + cbrt(eps(eltypeθ)), span) end pde_upper_bounds = map(pde_args) do pd span = map(p -> get(dict_upper_bound, p, p), pd) - map(s -> adapt(eltypeθ, s) - cbrt(eps(eltypeθ)), span) + map(s -> convert(eltypeθ, adapt(eltypeθ, s)) - cbrt(eps(eltypeθ)), span) end pde_bounds = [pde_lower_bounds, pde_upper_bounds] diff --git a/src/neural_adapter.jl b/src/neural_adapter.jl index e54c6e818..ee69249e0 100644 --- a/src/neural_adapter.jl +++ b/src/neural_adapter.jl @@ -5,8 +5,8 @@ function generate_training_sets(domains, dx, eqs, eltypeθ) dxs = fill(dx, length(domains)) end spans = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, dxs)] - train_set = adapt(eltypeθ, - hcat(vec(map(points -> collect(points), Iterators.product(spans...)))...)) + train_set = hcat(vec(map(points -> collect(points), Iterators.product(spans...)))...) + x = convert.(eltypeθ, adapt(eltypeθ, train_set)) end function get_loss_function_(loss, init_params, pde_system, strategy::GridTraining) @@ -30,7 +30,7 @@ function get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars, strateg bounds = first(map(args) do pd span = map(p -> get(dict_span, p, p), pd) - map(s -> adapt(eltypeθ, s), span) + map(s -> covert(eltypeθ, adapt(eltypeθ, s)), span) end) bounds = [getindex.(bounds, 1), getindex.(bounds, 2)] return bounds @@ -75,11 +75,11 @@ function get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars, lower_bounds = map(args) do pd span = map(p -> get(dict_lower_bound, p, p), pd) - map(s -> adapt(eltypeθ, s), span) + map(s -> convert(eltypeθ, adapt(eltypeθ, s)), span) end upper_bounds = map(args) do pd span = map(p -> get(dict_upper_bound, p, p), pd) - map(s -> adapt(eltypeθ, s), span) + map(s -> convert(eltypeθ, adapt(eltypeθ, s)), span) end bound = lower_bounds, upper_bounds end diff --git a/src/ode_solve.jl b/src/ode_solve.jl index bcf9c68eb..2b3ac4dfd 100644 --- a/src/ode_solve.jl +++ b/src/ode_solve.jl @@ -118,8 +118,10 @@ end function (f::ODEPhi{C, T, U})(t::Number, θ) where {C <: Lux.AbstractExplicitLayer, T, U <: Number} - y, st = f.chain( - adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), [t]), θ.depvar, f.st) + θ_ = ComponentArrays.getdata(θ.depvar) + eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_) + t_ = convert.(eltypeθ, adapt(typeθ, [t])) + y, st = f.chain(t_, θ.depvar, f.st) ChainRulesCore.@ignore_derivatives f.st = st f.u0 + (t - f.t0) * first(y) end @@ -127,15 +129,19 @@ end function (f::ODEPhi{C, T, U})(t::AbstractVector, θ) where {C <: Lux.AbstractExplicitLayer, T, U <: Number} # Batch via data as row vectors - y, st = f.chain( - adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), t'), θ.depvar, f.st) + θ_ = ComponentArrays.getdata(θ.depvar) + eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_) + t_ = convert.(eltypeθ, adapt(typeθ, t')) + y, st = f.chain(t_, θ.depvar, f.st) ChainRulesCore.@ignore_derivatives f.st = st f.u0 .+ (t' .- f.t0) .* y end function (f::ODEPhi{C, T, U})(t::Number, θ) where {C <: Lux.AbstractExplicitLayer, T, U} - y, st = f.chain( - adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), [t]), θ.depvar, f.st) + θ_ = ComponentArrays.getdata(θ.depvar) + eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_) + t_ = convert.(eltypeθ, adapt(typeθ, [t])) + y, st = f.chain(t_, θ.depvar, f.st) ChainRulesCore.@ignore_derivatives f.st = st f.u0 .+ (t .- f.t0) .* y end @@ -143,8 +149,10 @@ end function (f::ODEPhi{C, T, U})(t::AbstractVector, θ) where {C <: Lux.AbstractExplicitLayer, T, U} # Batch via data as row vectors - y, st = f.chain( - adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), t'), θ.depvar, f.st) + θ_ = ComponentArrays.getdata(θ.depvar) + eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_) + t_ = convert.(eltypeθ, adapt(typeθ, t')) + y, st = f.chain(t_, θ.depvar, f.st) ChainRulesCore.@ignore_derivatives f.st = st f.u0 .+ (t' .- f.t0) .* y end diff --git a/src/pinn_types.jl b/src/pinn_types.jl index 59480d8a6..713e67e73 100644 --- a/src/pinn_types.jl +++ b/src/pinn_types.jl @@ -505,13 +505,19 @@ mutable struct Phi{C, S} end function (f::Phi{<:Lux.AbstractExplicitLayer})(x::Number, θ) - y, st = f.f(adapt(parameterless_type(ComponentArrays.getdata(θ)), [x]), θ, f.st) + θ_ = ComponentArrays.getdata(θ) + eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_) + x_ = convert.(eltypeθ, adapt(typeθ, [x])) + y, st = f.f(x_, θ, f.st) ChainRulesCore.@ignore_derivatives f.st = st y end function (f::Phi{<:Lux.AbstractExplicitLayer})(x::AbstractArray, θ) - y, st = f.f(adapt(parameterless_type(ComponentArrays.getdata(θ)), x), θ, f.st) + θ_ = ComponentArrays.getdata(θ) + eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_) + x_ = convert.(eltypeθ, adapt(typeθ, x)) + y, st = f.f(x_, θ, f.st) ChainRulesCore.@ignore_derivatives f.st = st y end @@ -522,13 +528,14 @@ end # the method to calculate the derivative function numeric_derivative(phi, u, x, εs, order, θ) - _type = parameterless_type(ComponentArrays.getdata(θ)) + θ_ = ComponentArrays.getdata(θ) + eltypeθ, typeθ = eltype(θ_), parameterless_type(θ_) ε = εs[order] _epsilon = inv(first(ε[ε .!= zero(ε)])) - ε = adapt(_type, ε) - x = adapt(_type, x) + ε = convert.(eltypeθ, adapt(typeθ, ε)) + x = convert.(eltypeθ, adapt(typeθ, x)) # any(x->x!=εs[1],εs) # εs is the epsilon for each order, if they are all the same then we use a fancy formula diff --git a/test/NNPDE_tests.jl b/test/NNPDE_tests.jl index 7236ac041..155b300d8 100644 --- a/test/NNPDE_tests.jl +++ b/test/NNPDE_tests.jl @@ -110,17 +110,17 @@ end Lux.Chain(Lux.Dense(2, 12, Lux.tanh), Lux.Dense(12, 12, Lux.tanh), Lux.Dense(12, 1))] - grid_strategy = NeuralPDE.GridTraining(0.1) - quadrature_strategy = NeuralPDE.QuadratureTraining(quadrature_alg = CubatureJLh(), + grid_strategy = GridTraining(0.1) + quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(), reltol = 1e-3, abstol = 1e-3, maxiters = 50, batch = 100) - discretization = NeuralPDE.PhysicsInformedNN(chain, grid_strategy) + discretization = PhysicsInformedNN(chain, grid_strategy) @named pde_system = PDESystem(eqs, bcs, domains, [x, y, z], [u(x, y, z), v(y, x), h(z), p(x, z)]) - prob = NeuralPDE.discretize(pde_system, discretization) + prob = discretize(pde_system, discretization) callback = function (p, l) println("Current loss is: $l") diff --git a/test/runtests.jl b/test/runtests.jl index e6248eae6..a874e66e9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,14 @@ using Pkg using SafeTestsets +# Run Lux in "safe-mode" with aggressive error checking +using Preferences + +# TODO: `warn` is for debugging purposes. Change to `error` for best performance. +Preferences.set_preferences!("Lux", "eltype_mismatch_handling" => "warn") +Preferences.set_preferences!("LuxLib", "instability_check" => "warn") +Preferences.set_preferences!("LuxCore", "instability_check" => "warn") + const GROUP = get(ENV, "GROUP", "All") const is_APPVEYOR = Sys.iswindows() && haskey(ENV, "APPVEYOR")