Montura Consulting   Research & Development
 Building Clinical SDTM and ADaM Datasets

 

 


OVERVIEW

The following server-side SCL object is programmed to perform a series of operations according to CDISC specifications.

  • Compare column attributes, actual vs. expected
    1. column name
    2. column typecast
    3. column length.
  • Apply column labels
  • Apply dataset labels.
  • Create domain dataset with each column in the specified sequence
  • Provide error messages
    1. Absent columns
    2. Wrong typecast
    3. Wrong length.

MODIFY or COMPLETE RECONSTRUCTION ?

The goal is to create a foundation where software becomes more robust over time.

The problem is that programmers code logical steps as a "single procedure". In reality, there is no such thing as a logical step. The application is just one big tangled process that is too complex, or too ugly, for anyone except the original programmer to upgrade. Transferring the program to a new programmer ends up taking more time and effort than coding something new from scratch. That explains why its cost-effective to throw out old code and start over.

While the following program is object-oriented, its NOT modular.
It works as expected but is not easily understood.


SAS object source code

class component2;                                                                                    
public list workDataset / (sendEvent='N');
public list specTOC / (sendEvent='N');
public list specDataset / (sendEvent='N');
public list messages / (sendEvent='N');
public list activeMethods / (initialValue={
'interface01',
'interface02',
'interface03',
'interface04',
'interface05',
'interface06',
'interface07',
'interface08'
});

runInterface: method;
dcl num xMethod;

do xMethod=1 to listlen(activeMethods) while (nameditem(messages, 'stop')=0);
call send(_self_, getitemc(activeMethods, xMethod));
end;

call send(_self_, 'cleanup');
call send(_self_, 'systemMessages');
endmethod;

interface01: method / (description='Load Metadata');
fillist('catalog', 'montura.cdisc.toc.slist', specTOC);
fillist('catalog', 'montura.cdisc.dataset.slist', specDataset);
endmethod;

interface02: method / (description='read workspace members');
submit continue sql;
create table temp as
select *
from sashelp.vstable
where libname='WORK';
quit;
endsubmit;

dcl num dset;
dset=open('work.temp', 'i');
do while (fetch(dset)=0);
insertc(workDataset, getvarc(dset, varnum(dset, 'memname')), -1);
end;
close(dset);
endmethod;

interface03: method / (description='Identify submission datasets in WORK library');
dcl num i;
dcl list tocDSN=getiteml(specTOC);

do i=listlen(workDataset) to 1 by -1;
if nameditem(tocDSN, getitemc(workDataset, i))=0 then do;
delitem(workDataset, i);
end;
end;

if listlen(workDataset)=0 then
insertc(messages, 'Zero analysis dataset found in WORK library', -1, 'stop');
endmethod;

interface04: method / (description='assemble actual dataset attributes');
dcl char thisDataset;
dcl num i;

do i=1 to listlen(workDataset);
thisDataset=getitemc(workDataset, i);

submit continue sql;
create table work.temp as
select upcase(memname) as work_dataset,
upcase(name) as work_name,
upcase(type) as work_type,
length as work_length
from sashelp.vcolumn
where libname='WORK' and
memname='&thisDataset';
quit;
endsubmit;

submit continue;
proc append base=work.actual data=work.temp force;
run;
endsubmit;
end;
endmethod;

interface05: method / (description='assemble expected attributes');
dcl num xDataset xAttribute dset;
dcl list attributes column;

submit continue sql;
create table work.expected (
dataset varchar(32),
name varchar(32),
type varchar(4),
length num
);
quit;
endsubmit;

do xDataset=1 to listlen(workDataset);
attributes=getniteml(specDataset, getitemc(workDataset, xDataset));
column=getiteml(attributes, xAttribute);

dset=open('work.expected', 'u');
do xAttribute=1 to listlen(attributes);
call putvarc(dset, varnum(dset, 'dataset'), getitemc(workDataset, xDataset));
call putvarc(dset, varnum(dset, 'name'), nameitem(attributes, xAttribute));
call putvarc(dset, varnum(dset, 'type'), getnitemc(column, 'type'));
call putvarn(dset, varnum(dset, 'length'), getnitemn(column, 'length'));
append(dset, 'noinit');
end;
close(dset);
end;
endmethod;

interface06: method / (description='compare actual vs. expected attributes');
submit continue sql;
create table compare as
select a.*,
b.work_name,
b.work_type,
b.work_length
from work.expected a
left join work.actual b
on a.dataset=b.work_dataset and
a.name=b.work_name
order by a.dataset, a.name;
quit;
endsubmit;

submit continue;
data compare;
length message $60;
set compare;

if work_name='' then message='Absent Column';

if length(work_name) GT 1 then do;
if upcase(type) NE upcase(work_type) then do;
if upcase(type)='CHAR' then message='Typecast is NUM, expected CHAR';
if upcase(type)='NUM' then message='Typecast is CHAR, expected NUM';
end;
if message='' then do;
if length NE work_length then do;
message='Length is '||trim(left(work_length))||', expected '||left(length);
end;
end;
end;

if message='' then message="Pass";
if message='' then violation=0;
else violation=1;
keep message dataset name violation;
run;

proc print data=compare noobs label;
by dataset;
id dataset;
var name message;
run;
endsubmit;
endmethod;

interface07: method / (description='Identify datasets with zero errors');
submit continue sql;
create table datasetAudit as
select distinct dataset,
count(violation) as violation
from compare;
quit;
endsubmit;
endmethod;

interface08: method / (description='Create SDTM/ADAM dataset');
dcl num xAttribute violation;
dcl char datasetName datasetLabel columnName columnLabel delimiter datasetCategory;
dcl list attributes;
dcl list datasetTOC=getiteml(specTOC);

dcl num dset;
dset=open('work.datasetAudit', 'i');
do while (fetch(dset)=0);
delimiter='';

datasetName=getvarc(dset, varnum(dset, 'dataset'));
datasetCategory=getnitemc(getniteml(datasetTOC, datasetName), 'category');
datasetLabel=getnitemc(getniteml(datasetTOC, datasetName), 'label');
violation= getvarn(dset, varnum(dset, 'violation'));
attributes=getniteml(specDataset, datasetName);

if violation then continue;

submit;
proc sql;
create table &datasetCategory.&datasetName as
select
endsubmit;

do xAttribute=1 to listlen(attributes);
columnName=nameitem(attributes, xAttribute);
columnLabel=getnitemc(getniteml(attributes, columnName), 'label');

submit;
&delimiter &columnName label='&columnLabel'
endsubmit;

delimiter=',';
end;

submit continue;
from &datasetName;
quit;
endsubmit;

submit continue;
proc datasets lib=&datasetCategory nolist nowarn;
modify &datasetName (label='&datasetLabel');
quit;
endsubmit;
end;
close(dset);
endmethod;

cleanup: method / (description='Remove temporary work files');
delete('work.temp');
delete('work.actual');
delete('work.expected');
delete('work.compare');
delete('work.datasetaudit');

dellist(specTOC, 'Y');
dellist(specDataset, 'Y');
endmethod;

systemMessages: method / (description='Send system messages to the LOG');
dcl num i;
dcl char thisMessage;

put '-----------------------------------------------------------';
put ' MPAR System Messages ';
put '-----------------------------------------------------------';
put ' ';

do i=1 to listlen(messages);
thisMessage=getitemc(messages, i);
put thisMessage;
end;
endmethod;
endclass;

SAS Output Window and SAS LOG

 

 


SERIAL MODELS


REFACTORING, CREATING SERIAL MODELS

Refactoring means to chop big programs into smaller, logical steps. Serial Models execute modular steps in a predetermined sequence, while still allowing for on-the-fly alterations to that sequence. It's important to recognize that the scope of a single logical step may appear more logical for computers than for humans.

RRP Serial Models are composed of two types of objects.

  • One controller object
  • Any number of task objects


CONTROLLER OBJECT

Two objectives

  1. Remove the need for IMPORT and INCLUDE statements.
  2. Execute each object (SAS program) in sequence.

Terminology

Repository: a SAS catalog

Object: a SAS program

Metadata: permanent data stored as an SLIST entry in a SAS catalog, as a SAS dataset, etc..

Required Metadata

  • The name of each repository of objects.
  • The sequence of execution for each object.

 

 

 

 

 

TASK OBJECT(S)

A task is one logical step of the process.

Programming Practices

  • Do NOT use macro variables outside of Controller.
  • Do NOT instantiate another object.
  • Do NOT pass parameters to another object through method calls or events.
  • Do NOI directly invoke another object.

 

 


Use multiple Serial Model copies for speedy parallel processing or
for side-by-side permutation analysis

 

Controller Object source code

The controller object is simple in design. Its only job is to assemble and execute SAS objects.

class controller;                                                 
public list application / (sendEvent='N'); public list activeMethods / (sendEvent='N'); public list messages / (sendEvent='N'); runInterface: method;
dcl num xMethod;
do xMethod=1 to listlen(activeMethods) while (listlen(messages)=0);
call send(_self_, getitemc(activeMethods, xMethod));
end;
endmethod;

interface01: method / (description='Load SAS objects');
dcl num dset;
dcl char arg1 arg2 arg3 arg4;
dcl object thisProgram;

submit continue sq;;
create table work.objects as
select *
from sashelp.vcatalg
where libname='MONTURA' and
memname in('CDISC', 'CDISC_DATAEDITS', 'CDISC_DASHBOARD', 'CDISC_REPORTS') and objtype='CLASS';
quit;
endsubmit;

dset=open('work.objects', 'i');
do while (fetch(dset)=0);
arg1=getvarc(dset, varnum(dset, 'libname'));
arg2=getvarc(dset, varnum(dset, 'memname'));
arg3=getvarc(dset, varnum(dset, 'objname'));
arg4=getvarc(dset, varnum(dset, 'objtype'));

thisProgram=instance(loadclass(arg1||'.'||arg2||'.'||arg3||'.'||arg4));
inserto(application, thisProgram, -1);
end;
close(dset);
endmethod; interface02: method / (description='Execute each object'); dcl thisObject; dcl num xObject; do xObject=1 to listlen(application) while (nameditem(messages, 'stop')=0); thisObject=getitemo(application, xObject); call send(thisObject, 'runInterface'); end; endmethod;
endclass;

 

Task Object #1 source code

The first logcal step is to identify analysis datasets in the WORK library.

Handling Errors

When errors or certain data conditions are met, a task object issues a message that is available on a system-wide basis. For example, when an error is detected in component1

  1. Component1 issues a "stop" message and terminates its own cycle..
  2. Controller reads the message and the application exits without crashing.
class component1;
public list workDataset / (sendEvent='N');
public list specDataset / (sendEvent='N'); public list specTOC / (sendEvent='N');
public list messages / (sendEvent='N');
private list activeMethods / (initialValue={
'interface01',
'interface02',
'interface03'
});

runInterface: method;
dcl num xMethod;

do xMethod=1 to listlen(activeMethods) while (nameditem(messages, 'stop')=0);
call send(_self_, getitemc(activeMethods, xMethod));
end;
endmethod;

interface01: method / (description='Load Metadata'); fillist('catalog', 'montura.cdisc.toc.slist', specTOC);
fillist('catalog', 'montura.cdisc.dataset.slist', specDataset);
endmethod;

interface02: method / (description='Load all datasets in specified library');
submit continue sql;
create table work.temp as
select *
from sashelp.vstable
where libname='WORK';
quit;
endsubmit;

dcl num dset;
dset=open('work.temp', 'i');
do while (fetch(dset)=0);
insertc(workDataset, getvarc(dset, varnum(dset, 'memname')), -1);
end;
close(dset);
endmethod; interface03: method / (description='Keep only analysis datasets');
dcl num i;
dcl list tocDSN=getiteml(specTOC);

do i=listlen(workDataset) to 1 by -1;
if nameditem(tocDSN, getitemc(workDataset, i))=0 then do;
delitem(workDataset, i);
end;
end;

if listlen(workDataset)=0 then
insertc(messages, 'Zero analysis dataset found', -1, 'stop');
endmethod; endclass;

 

Task Object #2 source code

The second logical step is to assemble ACTUAL analysis datastep attributes.
The third logical step will be to assemble EXPECTED attributes.

class component2;                                     
public list workDataset / (sendEvent='N');
public list specDataset / (sendEvent='N'); public list specTOC / (sendEvent='N');
public list messages / (sendEvent='N');
private list activeMethods / (initialValue={
'interface01',
'interface02'
});



    runInterface: method;                                                                            
dcl num xMethod;

do xMethod=1 to listlen(activeMethods) while (nameditem(messages, 'stop')=0);
call send(_self_, getitemc(activeMethods, xMethod));
end;
endmethod; interface01: method / (description='Load Metadata'); fillist('catalog', 'montura.cdisc.toc.slist', specTOC);
fillist('catalog', 'montura.cdisc.dataset.slist', specDataset);
endmethod;

interface02: method / (description='assemble analysis dataset attributes');
dcl char thisDataset;
dcl num i;

do i=1 to listlen(workDataset);
thisDataset=getitemc(workDataset, i);

submit continue sql;
create table work.temp as
select upcase(memname) as work_dataset,
upcase(name) as work_name,
upcase(type) as work_type,
length as work_length
from sashelp.vcolumn
where libname='WORK' and
memname='&thisDataset';
quit;
endsubmit;

submit continue;
proc append base=work.actual data=work.temp force;
run;
endsubmit;
end;
endmethod; endclass;

SUMMARY

The complexity of each object remains very low.