CFD technical skills:

CAD geometry tool: Solidworks, CATIA, Unigraphics, AutoCAD, Salome, Blender.
CFD pre-processing tool: ICEM-CFD, snappyHexMesh, Pointwise, Salome, GiD, Gmsh.
CFD solver tool: OpenFOAM, FLUENT, CFX, STAR-CD, STAR-CCM+, Code_Saturne, Telemac-2D, Telemac-3D.
CFD post-processing tool: Paraview, Tecplot, EnSight, FieldView, VisIt, Maya, Blender, Matplotlib.
Earth system models (ESMs): CESM, WRF, ROMS, Reginal Coupled CESM (RCESM).
Parallel programming: MPI, OpenMP, OpenACC and CUDA (GPUs), Vectorization (MICs).
High performance computing: Valgrind, Perf, IPM, VTune, TAU, TotalView.
Scientific computational languages: C++, C, Fortran, Matlab, Python, Julia.
Operating systems and Website programming: Linux, Windows, HTML/CSS/JavaScript, PHP, MySQL.

CFD modelling:

Multiphase flow modeling of Mixture, Euler-Euler and Eulerian-Lagrangian approaches.
Interface dynamics and phase change using VOF, Level Set and CLSVOF with heat transfer.
Discrete phase modeling with DPM (discrete particle model) and DEM (discrete element method).
Multi-physics simulations through FSI (fluid-structure interaction) and LBM (lattice Boltzmann method).
Hypersonic (shock wave capture) and rarefied flows (3D DSMC-direct simulation Monte Carlo).


Parallel Programming using MPI (spmd), OpenMP (spmt), OpenACC and CUDA (GPUs), vectorization (simd).

C++ object-oriented design (OOD) manufactory model for Cuda GPU object-oriented programming (OOP) framework


const int IMAX{1000}, JMAX{1000}, KMAX{1000};

struct fnode{...};
struct macronode{...};

class cudaRun {
    cudaRun() {};
    virtual ~cudaRun() {};
    virtual void run() const = 0 {};
    void init(){
        cudaMallocManaged((void **) &buf_macro, IMAX*JMAX*KMAX*sizeof(macronode));
        cudaMallocManaged((void **) &buf_f, IMAX*JMAX*KMAX*sizeof(fnode));
        cudaMallocManaged((void **) &buf_isnode, IMAX*JMAX*KMAX*sizeof(int));
    fnode 3D_array;

class cudaNoshare: public cudaRun {
    __global__ void kernel(int *isnode, fnode *buf_f, macronode *buf_macro, srtpara para, int imax, int
    jmax, int kmax) {
    virtual void run() const{
        kernel<<<grid, block>>>(buf_isnode, buf_f, buf_macro, bufh_para, IMAX, JMAX, KMAX);

class cudaShare: public cudaRun {
    __global__ void kernel(int *isnode, fnode *buf_f, macronode *buf_macro, srtpara para, int imax, int
                            jmax, int kmax) {
    virtual void run const() {
        kernel<<<grid, block>>>(buf_isnode, buf_f, buf_macro, bufh_para, IMAX, JMAX, KMAX);

class cudaStream: public cudaRun {
    __global__ void kernel(int *isnode, fnode *buf_f, macronode *buf_macro, srtpara para, int imax,
                           int jmax, int kmax){
    virtual void run() const {
        kernel<<<grid, block, 0, stream>>>(buf_isnode, buf_f, buf_macro, bufh_para, IMAX, JMAX, KMAX);

int main(void) {
    cudaRun *gpuRun = new cudaNoshare();
    //cudaRun *gpuRun = new cudaShare();
    //cudaRun *gpuRun = new cudaStream();
    return 0;


Fortran object-oriented design (OOD) manufactory model for Cuda GPU object-oriented programming (OOP) framework

    use mod_cudaInit
    use mod_cudaRun
    implicit none
    type(cudaInit), pointer :: gpuInit
    type(cudaNoshare), pointer :: gpuNoshare
    type(cudaShare), pointer :: gpuShare
    type(cudaStream), pointer :: gpuStream

    class(cudaRun), pointer :: gpuRun


    gpuRun => cudaNoshare
    ! gpuRun => cudaShare
    ! gpuRun => cudaStream

    call gpuInit % init()
    call gpuRun % run()

module mod_cudaInit
    use mod...
    implicit none
    type, public :: cudaInit
    procedure(data_init) :: init
    end type cudaInit

    subroutine data_init(this)
    end subroutine init
end module mod_cudaInit

module mod_cudaRun
    use mod_cudaInit
    implicit none
    type, abstract, public :: cudaRun
        procedure(abs_run), deferred :: run
    end type cudaRun
    abstract interface
        subroutine abs_run(this)
        end subroutine abs_run
    end interface
end module mod_cudaRun

module mod_cudaNoshare
    use cudafor
    use cudaInit
    use mod_cudaRun
    implicit none
    type, extends(cudaRun), public :: cudaNoshare
    procedure :: run => no_share_run
    end type cudaRun
    private :: no_share_run
    attributes(global) subroutine kernel(int *isnode, fnode *buf_f, macronode *buf_macro, srtpara para, &
    int imax, int jmax, int kmax)
    end subroutine kernel
    subroutine no_share_run()
        call kernel<<<grid, block>>>(buf_isnode, buf_f, buf_macro, bufh_para, IMAX, JMAX, KMAX)
    end subroutine no_share_run
end module mod_cudaNoshare

module mod_cudaShare
end module mod_cudaShare

module mod_cudaStream
end module mod_cudaStream


CFD Lattice Boltzmann method for computational fluid dynamics with BGK and MRT collision model C++
MPI Finite difference method for heat transfer with MPI-2 parallel I/O and MPI-3 neighborhood collectives Fortran
Hybrid OpenMP/OpenACC/MPI Hybrid OpenMP/MPI with OpenMP-2,3,4 under MPI thread initializing; Hybrid OpenACC/MPI with multiple GPUs and OpenMP C
Hybrid Cuda/MPI Hybrid Cuda/MPI with non-shared and shared GPU memory usage Cuda
Python/OpenFOAM Python picking up the residuals from OpenFOAM log output file and drawing the residual curves Python