Zhi Shang Website

CFD technical skills:

CAD geometry tool: Solidworks, CATIA, Unigraphics, AutoCAD, Salome, Blender.

CFD pre-processing tool: ICEM-CFD, snappyHexMesh, Pointwise, Salome, GiD, Gmsh.

CFD solver tool: OpenFOAM, FLUENT, CFX, STAR-CD, STAR-CCM+, Code_Saturne, Telemac-2D, Telemac-3D.

CFD post-processing tool: Paraview, Tecplot, EnSight, FieldView, VisIt, Maya, Blender, Matplotlib.

Earth system models (ESMs): CESM, WRF, ROMS, Reginal Coupled CESM (RCESM).

Parallel programming: MPI, OpenMP, OpenACC and CUDA (GPUs), Vectorization (MICs).

High performance computing: Valgrind, Perf, IPM, VTune, TAU, TotalView.

Scientific computational languages: C++, C, Fortran, Matlab, Python, Julia.

Operating systems and Website programming: Linux, Windows, HTML/CSS/JavaScript, PHP, MySQL.

CFD modelling:

Multiphase flow modeling of Mixture, Euler-Euler and Eulerian-Lagrangian approaches.

Interface dynamics and phase change using VOF, Level Set and CLSVOF with heat transfer.

Discrete phase modeling with DPM (discrete particle model) and DEM (discrete element method).

Multi-physics simulations through FSI (fluid-structure interaction) and LBM (lattice Boltzmann method).

Hypersonic (shock wave capture) and rarefied flows (3D DSMC-direct simulation Monte Carlo).

HPC:

Parallel Programming using MPI (spmd), OpenMP (spmt), OpenACC and CUDA (GPUs), vectorization (simd).

C++ object-oriented design (OOD) manufactory model for Cuda GPU object-oriented programming (OOP) framework

#include<iostream>
#include<cuda.h>
#include<cuda_runtime.h>
#include<sys/time.h>

const int IMAX{1000}, JMAX{1000}, KMAX{1000};

struct fnode{...};
struct macronode{...};

class cudaRun {
public:
    cudaRun() {};
    virtual ~cudaRun() {};
    virtual void run() const = 0 {};
    void init(){
        cudaMallocManaged((void **) &buf_macro, IMAX*JMAX*KMAX*sizeof(macronode));
        cudaMallocManaged((void **) &buf_f, IMAX*JMAX*KMAX*sizeof(fnode));
        cudaMallocManaged((void **) &buf_isnode, IMAX*JMAX*KMAX*sizeof(int));
    }
private:
    fnode 3D_array;
}

class cudaNoshare: public cudaRun {
public:
    __global__ void kernel(int *isnode, fnode *buf_f, macronode *buf_macro, srtpara para, int imax, int
    jmax, int kmax) {
        ...
    }
    virtual void run() const{
        kernel<<<grid, block>>>(buf_isnode, buf_f, buf_macro, bufh_para, IMAX, JMAX, KMAX);
        ...
    }
}

class cudaShare: public cudaRun {
public:
    __global__ void kernel(int *isnode, fnode *buf_f, macronode *buf_macro, srtpara para, int imax, int
                            jmax, int kmax) {
        ...
    }
    virtual void run const() {
        kernel<<<grid, block>>>(buf_isnode, buf_f, buf_macro, bufh_para, IMAX, JMAX, KMAX);
        ...
    }
}

class cudaStream: public cudaRun {
public:
    __global__ void kernel(int *isnode, fnode *buf_f, macronode *buf_macro, srtpara para, int imax,
                           int jmax, int kmax){
        ...
    }
    virtual void run() const {
        kernel<<<grid, block, 0, stream>>>(buf_isnode, buf_f, buf_macro, bufh_para, IMAX, JMAX, KMAX);
        ...
    }
}

int main(void) {
    cudaRun *gpuRun = new cudaNoshare();
    //cudaRun *gpuRun = new cudaShare();
    //cudaRun *gpuRun = new cudaStream();
    gpuRun->init();
    gpuRun->run();
    return 0;
}

Fortran object-oriented design (OOD) manufactory model for Cuda GPU object-oriented programming (OOP) framework

PROGRAM MAIN
    use mod_cudaInit
    use mod_cudaRun
    implicit none
    type(cudaInit), pointer :: gpuInit
    type(cudaNoshare), pointer :: gpuNoshare
    type(cudaShare), pointer :: gpuShare
    type(cudaStream), pointer :: gpuStream

    class(cudaRun), pointer :: gpuRun

    allocate(gpuNoshare)
    allocate(gpuShare)
    allocate(gpuStream)

    gpuRun => cudaNoshare
    ! gpuRun => cudaShare
    ! gpuRun => cudaStream

    call gpuInit % init()
    call gpuRun % run()
END PROGRAM

module mod_cudaInit
    use mod...
    implicit none
    type, public :: cudaInit
    contains
    procedure(data_init) :: init
    end type cudaInit

    contains
    subroutine data_init(this)
    ...
    end subroutine init
end module mod_cudaInit

module mod_cudaRun
    use mod_cudaInit
    implicit none
    type, abstract, public :: cudaRun
        contains
        procedure(abs_run), deferred :: run
    end type cudaRun
    !-----------------------
    abstract interface
        subroutine abs_run(this)
        ...
        end subroutine abs_run
    end interface
end module mod_cudaRun

module mod_cudaNoshare
    use cudafor
    use cudaInit
    use mod_cudaRun
    implicit none
    type, extends(cudaRun), public :: cudaNoshare
    procedure :: run => no_share_run
    end type cudaRun
    !===============
    private :: no_share_run
    contains
    attributes(global) subroutine kernel(int *isnode, fnode *buf_f, macronode *buf_macro, srtpara para, &
    int imax, int jmax, int kmax)
    ...
    end subroutine kernel
    subroutine no_share_run()
        call kernel<<<grid, block>>>(buf_isnode, buf_f, buf_macro, bufh_para, IMAX, JMAX, KMAX)
    end subroutine no_share_run
end module mod_cudaNoshare

module mod_cudaShare
    ...
end module mod_cudaShare

module mod_cudaStream
    ...
end module mod_cudaStream

GitHub:

CFD	Lattice Boltzmann method for computational fluid dynamics with BGK and MRT collision model	C++	https://github.com/zhishang80/OpenLBM
MPI	Finite difference method for heat transfer with MPI-2 parallel I/O and MPI-3 neighborhood collectives	Fortran	https://github.com/zhishang80/HeatTransfer
Hybrid OpenMP/OpenACC/MPI	Hybrid OpenMP/MPI with OpenMP-2,3,4 under MPI thread initializing; Hybrid OpenACC/MPI with multiple GPUs and OpenMP	C	https://github.com/zhishang80/MatrixMulMatrix_MPI
Hybrid Cuda/MPI	Hybrid Cuda/MPI with non-shared and shared GPU memory usage	Cuda	https://github.com/zhishang80/MatrixMulMatrix_Cuda_MPI
Python/OpenFOAM	Python picking up the residuals from OpenFOAM log output file and drawing the residual curves	Python	https://github.com/zhishang80/Python_OpenFOAM_process