Couple KPP to UM using OASIS3-MCT

Changes to KPP


+++ Makefile    (working copy)
-FFLAGS=-fpp -xHost -O3 -r8 -I. -traceback -fp-model precise
+FFLAGS=-fpp -xHost -O3 -r8 -I. -traceback -fp-model precise -diag-disable 10010
 
-OASIS3_LIB=-lpsmile.MPI1 -lmpp_io
+OASIS3_LIB=-lpsmile.MPI1 -lmct -lmpeu -lscrip
 
+++ steves_3D_ocn.f     (working copy)
-      use mod_prism_proto
+      use mod_oasis
 
-      call prism_get_localcomm_proto(kpp_mpi_comm,mpierr)
+      call oasis_get_localcomm(kpp_mpi_comm,mpierr)
 
+++ init_oasis3.f       (working copy)
-      USE mod_kinds_model
-      USE mod_prism_proto
-      USE mod_prism_def_partition_proto
-      USE mod_prism_put_proto
-      USE mod_prism_get_proto
-      USE mod_prism_grids_writing
+      USE mod_oasis_kinds
+      USE mod_oasis
 
-      CALL prism_init_comp_proto(il_comp_id, cp_modnam, ierror)
-      IF (ierror .NE. PRISM_Ok) THEN
+      CALL oasis_init_comp(il_comp_id, cp_modnam, ierror)
+      IF (ierror .NE. OASIS_Ok) THEN
          WRITE(nuout,*) 'KPP: Received error from ',
-     +        'PRISM_Init_Comp_Proto = ',ierror
-         CALL prism_abort_proto(il_comp_id,'KPP init_oasis3.f','abort1')
+     +        'OASIS_Init_Comp_Proto = ',ierror
+         CALL oasis_abort(il_comp_id,'KPP init_oasis3.f','abort1')
          ! Can/should we call MIXED_ABORT here as well?
       ELSE
-         WRITE(nuout,*) 'KPP: Successful call to PRISM_Init_Comp_Proto'
+         WRITE(nuout,*) 'KPP: Successful call to OASIS_Init_Comp_Proto'
       ENDIF
 
       ! Get local communicator
-      CALL prism_get_localcomm_proto(il_commlocal,ierror)
-      IF (ierror .NE. PRISM_Ok) THEN
+      CALL oasis_get_localcomm(il_commlocal,ierror)
+      IF (ierror .NE. OASIS_Ok) THEN
          WRITE(nuout,*) 'KPP: Received error from ',
-     +        'PRISM_Get_LocalComm_Proto = ',ierror
+     +        'OASIS_Get_LocalComm_Proto = ',ierror
       ELSE
          WRITE(nuout,*) 'KPP: Successfully received local communicator'
       ENDIF
@@ -81,15 +81,15 @@
 
       ! Define the grids used by KPP (for master processor only)
       IF (il_rank .EQ. 0) THEN
-         CALL prism_start_grids_writing(il_flag)
+         CALL oasis_start_grids_writing(il_flag)
          IF (il_flag .EQ. 1) THEN
       ! Will we ever need to do this?  Do we need to support it?
             WRITE(nuout,*) 'KPP: il_flag=1, so we will write ',
-     +           'grids for PRISM'
-            CALL prism_terminate_grids_writing()
+     +           'grids for OASIS'
+            CALL oasis_terminate_grids_writing()
          ELSE
             WRITE(nuout,*) 'KPP: il_flag/=1, so we will not write ',
-     +           'grids for PRISM'
+     +           'grids for OASIS'
          ENDIF
       ENDIF
 
@@ -103,12 +103,12 @@
       il_paral ( clim_offset   ) = 0
       il_paral ( clim_length   ) = NX_GLOBE*NY_GLOBE
 
-      CALL prism_def_partition_proto(il_part_id,il_paral,ierror)
-      IF (ierror.NE.PRISM_Ok) THEN
+      CALL oasis_def_partition(il_part_id,il_paral,ierror)
+      IF (ierror.NE.OASIS_Ok) THEN
          WRITE(nuout,*) 'KPP: Received error from ',
-     +        'PRISM_Def_Partition_Proto = ',ierror
+     +        'OASIS_Def_Partition_Proto = ',ierror
       ELSE
-         WRITE(nuout,*) 'KPP: Called PRISM_Def_Partition_Proto'
+         WRITE(nuout,*) 'KPP: Called OASIS_Def_Partition_Proto'
       ENDIF
 
 #ifdef TOYCLIM /* For the OASIS3 toy model - Exchange 1D fields */
@@ -136,15 +136,15 @@
       cl_writ(6)='SVNOCEAN'
 
       DO i=1,jpfldout
-         CALL prism_def_var_proto(il_var_id_out(i),cl_writ(i),
-     +        il_part_id,il_var_nodims,PRISM_Out,il_var_shape,
-     +        PRISM_Real,ierror)
-         IF (ierror.NE.PRISM_Ok) THEN
+         CALL oasis_def_var(il_var_id_out(i),cl_writ(i),
+     +        il_part_id,il_var_nodims,OASIS_Out,il_var_shape,
+     +        OASIS_Real,ierror)
+         IF (ierror.NE.OASIS_Ok) THEN
             WRITE(nuout,*) 'KPP: Received error from ',
-     +           'PRISM_Def_Var_Proto = ',ierror,'for variable ',
+     +           'OASIS_Def_Var_Proto = ',ierror,'for variable ',
      +           cl_writ(i),' (output field)'
          ELSE
-            WRITE(nuout,*) 'KPP: Called PRISM_Def_Var_Proto for ',
+            WRITE(nuout,*) 'KPP: Called OASIS_Def_Var_Proto for ',
      +           'variable ',cl_writ(i),' (output field)'
          ENDIF
       ENDDO
@@ -163,25 +163,25 @@
       cl_read(11)='TAUY'
 
       DO i=1,jpfldin
-         CALL prism_def_var_proto(il_var_id_in(i),cl_read(i),
-     +        il_part_id,il_var_nodims,PRISM_In,il_var_shape,
-     +        PRISM_Real,ierror)
-         IF (ierror.NE.PRISM_Ok) THEN
+         CALL oasis_def_var(il_var_id_in(i),cl_read(i),
+     +        il_part_id,il_var_nodims,OASIS_In,il_var_shape,
+     +        OASIS_Real,ierror)
+         IF (ierror.NE.OASIS_Ok) THEN
             WRITE(nuout,*) 'KPP: Received error from ',
-     +           'PRISM_Def_Var_Proto = ',ierror,'for variable',
+     +           'OASIS_Def_Var_Proto = ',ierror,'for variable',
      +           cl_read(i),' (input field)'
          ELSE
-            WRITE(nuout,*) 'KPP: Called PRISM_Def_Var_Proto for ',
+            WRITE(nuout,*) 'KPP: Called OASIS_Def_Var_Proto for ',
      +           'variable ',cl_read(i),' (input field)'
          ENDIF
       ENDDO
 
-      CALL prism_enddef_proto(ierror)
-      IF (ierror.NE.PRISM_Ok) THEN
+      CALL oasis_enddef(ierror)
+      IF (ierror.NE.OASIS_Ok) THEN
          WRITE(nuout,*) 'KPP: Received error from ',
-     +        'PRISM_enddef_proto = ',ierror
+     +        'OASIS_enddef = ',ierror
       ELSE
-         WRITE(nuout,*) 'KPP: Called PRISM_Enddef_Proto'
+         WRITE(nuout,*) 'KPP: Called OASIS_Enddef_Proto'
       ENDIF
 
       RETURN
 
 
+++ couple_io_oasis3.f  (working copy)
-      USE mod_kinds_model
-      USE mod_prism_proto
-      USE mod_prism_def_partition_proto
-      USE mod_prism_put_proto
-      USE mod_prism_get_proto
-      USE mod_prism_grids_writing
+      USE mod_oasis_kinds
+      USE mod_oasis
 
@@ -28,6 +28,7 @@
 #include <times.com>
 #include <constants.com>
 #include <initialcon.com>
+      include "mpif.h"
 c
 c     Output variables on the KPP regional grid - returned to
 c     the calling routine (usually <fluxes>).
@@ -64,15 +65,15 @@
-      call prism_get_localcomm_proto(kpp_mpi_comm, ierror)
+      call oasis_get_localcomm(kpp_mpi_comm, ierror)
       if (ierror .ne. 0) then
-        call prism_abort_proto(il_comp_id,
+        call oasis_abort(il_comp_id,
      +      'couple_io_oasis3.f',
      +      'getcomm')
       end if
       call MPI_Comm_rank(kpp_mpi_comm, kpp_mpi_rank, ierror)
       if (ierror .ne. 0) then
-        call prism_abort_proto(il_comp_id,'couple_io_oasis3.f','rank')
+        call oasis_abort(il_comp_id,'couple_io_oasis3.f','rank')
       end if
-            CALL prism_get_proto(il_var_id_in(i),
+            CALL oasis_get(il_var_id_in(i),
 
-            IF (ierror.NE.PRISM_Ok .and. ierror .LT. PRISM_Recvd) THEN
+            IF (ierror.NE.OASIS_Ok .and. ierror .LT. OASIS_Recvd) THEN
 
-     +              'PRISM_Get_Proto =',ierror,' receiving variable ',
+     +              'OASIS_Get_Proto =',ierror,' receiving variable ',
 
-               CALL prism_abort_proto(il_comp_id,'couple_io_oasis3.f',
+               CALL oasis_abort(il_comp_id,'couple_io_oasis3.f',
 
@@ -191,15 +192,15 @@
 
-      USE mod_kinds_model
-      USE mod_prism_proto
-      USE mod_prism_def_partition_proto
-      USE mod_prism_put_proto
-      USE mod_prism_get_proto
-      USE mod_prism_grids_writing
+      USE mod_oasis_kinds
+      USE mod_oasis
 
@@ -418,22 +419,22 @@
 
-     +     'KPP: Calling PRISM_Put_Proto for variable ' // cl_writ(i) )
-         CALL prism_put_proto(il_var_id_out(i),
+     +     'KPP: Calling OASIS_Put_Proto for variable ' // cl_writ(i) )
+         CALL oasis_put(il_var_id_out(i),
 
-         IF (ierror.NE.PRISM_Ok.and.ierror.LT.PRISM_Sent) THEN
+         IF (ierror.NE.OASIS_Ok.and.ierror.LT.OASIS_Sent) THEN
 
-     +        'KPP: Received error from PRISM_Put_Proto =',ierror )
+     +        'KPP: Received error from OASIS_Put_Proto =',ierror )
 
-            CALL prism_abort_proto(il_comp_id,'couple_io_oasis3.f',
+            CALL oasis_abort(il_comp_id,'couple_io_oasis3.f',
 
-     +       'KPP: Successfully called PRISM_Put_Proto for variable ' //
+     +       'KPP: Successfully called OASIS_Put_Proto for variable ' //
 
-      USE mod_kinds_model
-      USE mod_prism_proto
+      USE mod_oasis_kinds
+      USE mod_oasis
 
-     +     'Calling prism_terminate_proto(ierror)' )
-      CALL prism_terminate_proto(ierror)
+     +     'Calling oasis_terminate(ierror)' )
+      CALL oasis_terminate(ierror)
 
-     +     'Called prism_terminate_proto(ierror)' )
-      IF (ierror .NE. PRISM_Ok) THEN
+     +     'Called oasis_terminate(ierror)' )
+      IF (ierror .NE. OASIS_Ok) THEN
 
-     +        'PRISM_Terminate_Proto =',ierror,
+     +        'OASIS_Terminate_Proto =',ierror,
 

Changes to UM


Mainly the configuration build:

ummodel/cfg/bld.cfg
Add:
excl_dep                                               USE::mod_oasis_kinds
and to tool::ldflags remove -lmpp_io and add -lmct -lscrip -lmpeu

umrecon/cfg/bld.cfg
to tool::ldflags remove -lmpp_io and add -lmct -lscrip -lmpeu

Current Crash


Runtime Crash:

NEMO_NPROC  CICE_NPROC
*****************************************************************
     Version 7.3 template, Unified Model ,  Non-Operational
     Created by UMUI version 7.3
*****************************************************************
PATH used = /short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin:
                    /short/w35/hxw599/UM_ROUTDIR//vaapb/bin:/projects/access/umdir/vn7.3/normal/scripts:
                    /projects/access/umdir/vn7.3/normal/exec:/projects/access/umdir/fcm1.4/bin:
                    /projects/access/umdir/vn7.3/normal/utils:/projects/access/umdir/bin:
                    /projects/access/umdir/vn7.3/bin:/projects/access/umdir/umui2.0/bin:
                    /projects/access/bin:/projects/access/umdir/vn7.3/normal/runscripts:
                    /apps/openmpi/wrapper:/apps/openmpi/1.8.2/bin:/apps/x11vnc/0.9.13/bin:
                    /opt/bin:/bin:/usr/bin:/opt/pbs/default/bin:/projects/access/bin/:/home/599/hxw599/bin:.
*****************************************************************
     Job started at : Mon Nov  9 15:57:08 AEDT 2015
     Run started from UMUI
     Running from control files in /home/599/hxw599/umui_runs/vaapb-313155649
uamul (collab) - N48 KPP - sea ice
This job is running on machine r76,
using UM directory /projects/access/umdir,
***************************************************************
   Starting script :   qsexecute
   Starting time   :   Mon Nov  9 15:57:08 AEDT 2015
***************************************************************
 
KPP using 15 processors
 
/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsexecute: Executing setup
 
/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qssetup: Job terminated normally
 
/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsexecute: Executing dump reconfiguration program
 
*********************************************************
RCF Executable : /short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qxreconf
*********************************************************
 
 
 
/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsexecute: Executing model run
 
*********************************************************
UM Executable : /short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/vaapb.exe
*********************************************************
 
 
No OASIS3 angles file will be used.
******************************************************
No existing rmp_* file directory specified
Any existing rmp_* files will be removed from
for safety
Generating rmp_* files at run time
NOTE: This will vastly increase your required run time
******************************************************
cp: cannot stat `/short/w35/hxw599/vaapb/kpp-scripts//namcouple': No such file or directory
/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsexecute[888]: /short/w35/hxw599/vaapb/kpp-scripts//kpp_run_pre.ksh: not found [No such file or directory]
readline() on closed filehandle F0 at /short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/OASIS3_kpp line 60.
readline() on closed filehandle F1 at /short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/OASIS3_kpp line 120.
seconds total = 0
oasis_init_comp: Calling MPI_Init
... line repeated to a total of 48 times == number of UM cores
 
oasis_init_comp: Not Calling MPI_Init
... line repeated to a total of 15 times == number of KPP cores
 
forrtl: error (78): process killed (SIGTERM)
... (snip 47 um sigterms and 15 kpp sigterms)
 
/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsexecute[1165]: /short/w35/hxw599/vaapb/kpp-scripts//kpp_run_post.ksh: not found [No such file or directo
ry]
*****************************************************************
   Ending script   :   qsexecute
   Completion code :   1
   Completion time :   Mon Nov  9 15:57:16 AEDT 2015
*****************************************************************
\n\n\n
 
/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsmaster: Failed in qsexecute in model vaapb
*****************************************************************
   Starting script :   qsfinal
   Starting time   :   Mon Nov  9 15:57:16 AEDT 2015
*****************************************************************
 
/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsfinal: Model vaapb - Error: No history files
*****************************************************************
   Ending script   :   qsfinal
   Completion code :   135
   Completion time :   Mon Nov  9 15:57:16 AEDT 2015
*****************************************************************
\n\n\n
 
/short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/bin/qsmaster: failed in final in model vaapb
 <<<< Information about How Many Lines of Output follow >>>>
 73  lines in main OUTPUT file.
 0 lines of O/P from pe0.
 <<<<         Lines of Output Information ends          >>>>
 
 ***   *   *  *****  ****   *   *  *****
*   *  *   *    *    *   *  *   *    *
*   *  *   *    *    *   *  *   *    *
*   *  *   *    *    *   *  *   *    *
*   *  *   *    *    ****   *   *    *
*   *  *   *    *    *      *   *    *
*   *  *   *    *    *      *   *    *
*   *  *   *    *    *      *   *    *
 ***    ***     *    *       ***     *
 
 
 
 
****    ***   *****         ***   *   *  *****  ****   *   *  *****
*   *  *   *  *            *   *  *   *    *    *   *  *   *    *
*   *  *      *            *   *  *   *    *    *   *  *   *    *
*   *  *      *            *   *  *   *    *    *   *  *   *    *
****   *      ****         *   *  *   *    *    ****   *   *    *
**     *      *            *   *  *   *    *    *      *   *    *
* *    *      *            *   *  *   *    *    *      *   *    *
*  *   *   *  *            *   *  *   *    *    *      *   *    *
*   *   ***   *             ***    ***     *    *       ***     *
 
 
qsexecute:  %RECONA% Atmosphere reconfiguration step
 
 
 =====================================================
 GCOM Version 3.3
 openmpi/1.6.5,intel-fc/12.1.9.293
 Using precision : 64bit INTEGERs and 64bit REALs
 Built at Thu Aug 29 20:23:42 EST 2013
 =====================================================
 
 
 Parallel Reconfiguration using                      1  processor(s)
 divided into a LPG with nproc_x=                     1 and nproc_y=
                     1
 
OPEN:  Claimed 32000512 Bytes (4000064 Words) for Buffering
OPEN:  Buffer Address is                   F6A41040
CLOSE: File /short/w48/bxp565/ancils/lsm_claudia Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/qrparm.soil Closed on Unit 12
CLOSE: File /short/w48/bxp565/ancils/lsm_claudia Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/qrparm.orog Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/cable_vegfrac_N48.anc Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/cable_vegfunc_N48.anc Closed on Unit 12
CLOSE: File /short/w48/bxp565/ancils/lfrac_claudia Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/qrparm.soil.dust Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/TRIP_riv_store_ancil2 Closed on Unit 12
CLOSE: File /projects/access/data/ancil/access-1.3/N48-cal360/riverrouting_access_v2 Closed on Unit 12
CLOSE: File /short/w35/hxw599/UM_ROUTDIR/hxw599/vaapb/vaapb.astart Closed on Unit 11
CLOSE: File /short/w48/dxd565/UM_ROUTDIR/dxd565/ualdd//um-dump.restart Closed on Unit 10
 
 
*   *  *     *         ***   *   *  *****  ****   *   *  *****
*   *  **   **        *   *  *   *    *    *   *  *   *    *
*   *  * * * *        *   *  *   *    *    *   *  *   *    *
*   *  *  *  *        *   *  *   *    *    *   *  *   *    *
*   *  *     *        *   *  *   *    *    ****   *   *    *
*   *  *     *        *   *  *   *    *    *      *   *    *
*   *  *     *        *   *  *   *    *    *      *   *    *
*   *  *     *        *   *  *   *    *    *      *   *    *
 ***   *     *         ***    ***     *    *       ***     *
 
 
USING KPP_PRERUN
 
qsexecute: %MODEL% output follows:-
 
UMMACHINE =  ALTIX
false
USING LINUXMPP
ACCESSRUNCMD  -n 48 ./um7.3x : -n 15 ./toyoce
--------------------------------------------------------------------------
MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD
with errorcode 0.
 
NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes.
You may or may not see output from other processes, depending on
exactly when Open MPI kills them.
--------------------------------------------------------------------------
USING KPP_POSTRUN
/short/w35/hxw599/vaapb/kpp-scripts//kpp_run_post.ksh exited with error code 127
0+1 records in
0+1 records out
3787 bytes (3.8 kB) copied, 0.000257916 s, 14.7 MB/s