PROC SQL:  A fast, efficient, and versatile, subsetable, formatable, sortable, case definable Data Step.

/* creating table (a data set) with specific variables vs. all variables; Examples taken from different references stated above*/

PROC SQL;

            CREATE TABLE PATIENT_DATA AS

                        SELECT PATIENT, DATE, BPS, BPD, DOB, GENDER, Date_of_last_visit

                                    FROM VITAL_RECS;

QUIT;

PROC SQL;

            CREATE TABLE PATIENT_DATA AS

                        SELECT *

                                    FROM VITAL_RECS (DROP=TEMP);  /*DATASET OPTIONS ARE USABLE */

QUIT;

/* creating table with deduped data on specific variables */

PROC SQL;

            CREATE TABLE PATIDS AS

                        SELECT DISTINCT PATIENT

                                    FROM VITAL_RECS;

QUIT;

/* creating table with sorted data based on specific variables */

PROC SQL;

                        CREATE TABLE VISITS AS

SELECT PATIENT_ID, DOB, BPS, BPD,Date_of_last_visit         

PROC SQL:  A fast, efficient, and versatile, subsetable, formatable, sortable, case definable Data Step.

 /* creating table (a data set) with specific variables vs. all variables */

 

PROC SQL;

            CREATE TABLE PATIENT_DATA AS

                        SELECT PATIENT, DATE, BPS, BPD, DOB, GENDER, Date_of_last_visit

                                    FROM VITAL_RECS;

QUIT;

 PROC SQL;

            CREATE TABLE PATIENT_DATA AS

                        SELECT *

                                    FROM VITAL_RECS (DROP=TEMP);  /*DATASET OPTIONS ARE USABLE */

QUIT;

 /* creating table with deduped data on specific variables */

 PROC SQL;

            CREATE TABLE PATIDS AS

                        SELECT DISTINCT PATIENT

                                    FROM VITAL_RECS;

QUIT;

 /* creating table with sorted data based on specific variables */

 PROC SQL;

                        CREATE TABLE VISITS AS

SELECT PATIENT_ID, DOB, BPS, BPD,Date_of_last_visit

FROM VITAL_RECS

ORDER BY PATIENT_ID, DATE DESCENDING; /* SORT */;

QUIT;

 /* formatting and using functions while creating the table

Note that the function is over the whole data values of the column*/

 PROC SQL;

             CREATE TABE MEAN_BP AS

            SELECT          COUNT(*) AS N,

ROUND(MEAN(BPS), 0.01) FORMAT=6.2 AS BPSMEAN, /* FUNCTIONS USAGE */

                                    ROUND(MEAN(BPD),0.01) FORMAT=6.2 AS BPDMEAN         

FROM VITAL_RECS;

 QUIT;

 PROC SQL;

CREATE TABLE HIGH_BP AS

             SELECT          PATIENT,

COUNT(PATIENT) AS N,

                                    DATE FORMAT=DATE7.,

                                    MAX(BPD) AS BPDHIGH,

                                    ROUND(BPD/CALCULATED BPDHIGH)*100,0.01) FORMAT=6.2 AS BPD_PCT_DIFF

             FROM             VITAL_RECS

             GROUP           BY PATIENT_ID                     

QUIT;

 /* A most powerful SQL code */

 PROC SQL;

             CREATE TABLE HIGH_BP AS

             SELECT          PATIENT,

COUNT(PATIENT) AS N,

DATE FORMAT=DATE7.,

MAX(BPD) AS BPD_HIGH

             FROM             VITAL_RECS

             WHERE           PATIENT_ID IN (101 102 103 105)  /* SEE NO COMMAS */

             GROUP BY    PATIENT_ID

             HAVING        BPD=CALCULATED BPD_HIGH

             ORDER           BY CALCULATED BPD_HIGH;

 QUIT;

/* output */

PATIENT_ID N DATE BPDHIGH

103     4 14AUG01 89

101     3 01JUN01 92

102     3 06AUG01 97

/* end */

PROC SQL;         

            CREATE         TABLE TEST_MED AS

            SELECT          PATIENT_ID

            CASE              CASE((PATIENT/2=INT(PATIENT/2)+(PATIENT=.))

                                    WHEN 1 THEN ‘MED A’ WHEN 0 THEN ‘MED B’ ELSE ‘ERROR’

                                    END AS DOSEGRP LENGTH=5

            FROM             VITAL_RECS

             ORDER BY     PATIENT_ID;

 QUIT;

PROC SQL;

            CREATE TABLE LEFT AS

             SELECT          A.PATIENT,

                                    A.DATE FORMAT=DATE7. AS DATE,

                                    A.PULSE

                                    B.MED

                                    B.DOSES

                                    B.AMT FORMAT=4.1

            FROM             VITALS A LEFT JOIN DOSING B

                                    ON      (A.PATIENT=B.PATIENT) AND A.DATE=B.DATE

            ORDER BY     PETIENT, DATE;

QUIT;

PROC SQL NOPRINT;           

            SELECT          DISTINCT PATIENT             INTO: PAT1 - : PAT999

            FROM             VITALS

            ORDER BY     PATIENT;

QUIT;

 PROC SQL NOPRINT;

             SELECT          DISTINCT PATIENT             INTO: PATLIST          SEPARATED BY ‘,’

            FROM             VITALS

            ORDER BY     PATIENT;

QUIT;

 

PROC SQL NOPRINT;

            SELECT          SUM(DRUG=1), SUM(DRUG=2), SUM(1 <=DRUG<=2)    INTO:  N1, :N2, :N3

            FROM             ANALY

            WHERE           GENDER=’MALE’;

QUIT;

 PROC SQL;  

CREATE TABLE set AS

SELECT * FROM one

OUTER UNION     /*  SIMILAR TO SET A B; */

SELECT * FROM two;

QUIT;

 

PROC SQL;

CREATE TABLE Both AS

SELECT a.*, b.*

FROM one AS a FULL JOIN 

two AS b

ON a.patient=b.patient;

QUIT;

 

PROC SQL;

CREATE TABLE ifa1 AS

SELECT a.*,

b.treatment

FROM one AS a LEFT JOIN  (/* SIMILAR TO “IF A” IN MERGE */)

two AS b

ON a.patient=b.patient;

QUIT;

 

/* WHAT IS THE KEY STATEMENT IN AN EQUIVALENT MERGE */;

 

PROC SQL SYNTAX

PROC SQL;

CREATE TABLE ifb1 AS

SELECT     a.age,

a.sex,

b.*

FROM one AS a RIGHT JOIN

two AS b

QUIT;

/* WHAT IS THE KEY STATEMENT IN AN EQUIVALENT MERGE */;

PROC SQL;

CREATE TABLE Both AS

SELECT a.*, b.*

FROM one AS a INNER JOIN

two AS b

ON a.patient=b.patient;

QUIT;

 

The most powerful SQL use is for counts and creating macro variables. The syntax is similar for both so I’ll show the macro variable code creation. This small snippet of code has proven invaluable to me and I’ll never go back to using DATA step for this.

 

PROC SQL NOPRINT;

SELECT TRIM(LEFT(PUT(COUNT(DISTINCT patient),8.))) INTO : trt1 - : trt3

FROM two

GROUP BY treatment;

QUIT;

%PUT trt1=&trt1. trt2=&trt2. trt3=&trt3.;

Log Output

trt1=2 trt2=1 trt3=4

Another favourite tip when using SQL has been to produce the macro variables in a dataset table, and store the view table in a permanent directory, so I can view the numbers whenever needed. To do this, use the following code:

PROC SQL NOPRINT;

CREATE TABLE kir.macro_vars AS

SELECT name, value

FROM sashelp.vmacro

WHERE scope='GLOBAL' AND name IN ('TRT1' 'TRT2' 'TRT3')

ORDER BY 1;

QUIT;

 One may open kir.macro_vars to check the macro vars and their values

 FROM VITAL_RECS                                              

ORDER BY PATIENT_ID, DATE DESCENDING; /* SORT */;

QUIT;

 /* formatting and using functions while creating the table

Note that the function is over the whole data values of the column*/

 PROC SQL;

             CREATE TABE MEAN_BP AS

 SELECT          COUNT(*) AS N,

ROUND(MEAN(BPS), 0.01) FORMAT=6.2 AS BPSMEAN, /* FUNCTIONS USAGE */

                                    ROUND(MEAN(BPD),0.01) FORMAT=6.2 AS BPDMEAN

           

FROM VITAL_RECS;

 

QUIT;

PROC SQL;           

CREATE TABLE HIGH_BP AS

            SELECT          PATIENT,

COUNT(PATIENT) AS N,

                                    DATE FORMAT=DATE7.,

                                    MAX(BPD) AS BPDHIGH,

                                    ROUND(BPD/CALCULATED BPDHIGH)*100,0.01) FORMAT=6.2 AS BPD_PCT_DIFF

            FROM             VITAL_RECS

            GROUP           BY PATIENT_ID

QUIT;

/* A most powerful SQL code - so far*/

PROC SQL;

            CREATE TABLE HIGH_BP AS

            SELECT          PATIENT,

COUNT(PATIENT) AS N,

DATE FORMAT=DATE7.,

MAX(BPD) AS BPD_HIGH

 

            FROM             VITAL_RECS

 

            WHERE           PATIENT_ID IN (101 102 103 105)  /* SEE NO COMMAS */

 

            GROUP BY    PATIENT_ID

 

            HAVING        BPD=CALCULATED BPD_HIGH

 

            ORDER           BY CALCULATED BPD_HIGH;

 

QUIT;

/* output */

PATIENT_ID N DATE BPDHIGH

103     4 14AUG01 89

101     3 01JUN01 92

102     3 06AUG01 97

/* end */

PROC SQL;

            CREATE         TABLE TEST_MED AS

            SELECT          PATIENT_ID

            CASE              CASE((PATIENT/2=INT(PATIENT/2)+(PATIENT=.))

                                    WHEN 1 THEN ‘MED A’ WHEN 0 THEN ‘MED B’ ELSE ‘ERROR’

                                    END AS DOSEGRP LENGTH=5

            FROM             VITAL_RECS

            ORDER BY     PATIENT_ID;

QUIT;

PROC SQL;

            CREATE TABLE LEFT AS

            SELECT          A.PATIENT,

                                    A.DATE FORMAT=DATE7. AS DATE,

                                    A.PULSE

                                    B.MED

                                    B.DOSES

                                    B.AMT FORMAT=4.1

 

            FROM             VITALS A LEFT JOIN DOSING B

                                    ON      (A.PATIENT=B.PATIENT) AND A.DATE=B.DATE

 

            ORDER BY     PETIENT, DATE;

 QUIT;

 

PROC SQL NOPRINT;

           

            SELECT          DISTINCT PATIENT             INTO: PAT1 - : PAT999

           

            FROM             VITALS

 

            ORDER BY     PATIENT;

 

QUIT;

PROC SQL NOPRINT;

            SELECT          DISTINCT PATIENT             INTO: PATLIST          SEPARATED BY ‘,’

            FROM             VITALS

            ORDER BY     PATIENT;

QUIT;

PROC SQL NOPRINT;

            SELECT          SUM(DRUG=1), SUM(DRUG=2), SUM(1 <=DRUG<=2)    INTO:  N1, :N2, :N3

            FROM             ANALY

            WHERE           GENDER=’MALE’;

QUIT;

PROC SQL;  

CREATE TABLE set AS

SELECT * FROM one

OUTER UNION     /*  SIMILAR TO SET A B; */

SELECT * FROM two;

QUIT;

 

PROC SQL;

CREATE TABLE Both AS

SELECT a.*, b.*

FROM one AS a FULL JOIN 

two AS b

ON a.patient=b.patient;

QUIT;

PROC SQL;

CREATE TABLE ifa1 AS

SELECT a.*,

b.treatment

FROM one AS a LEFT JOIN  (/* SIMILAR TO “IF A” IN MERGE */)

two AS b

ON a.patient=b.patient;

QUIT;

 

/* WHAT IS THE KEY STATEMENT IN AN EQUIVALENT MERGE */;

 

PROC SQL SYNTAX

PROC SQL;

CREATE TABLE ifb1 AS

SELECT     a.age,

a.sex,

b.*

FROM one AS a RIGHT JOIN

two AS b

QUIT;

/* WHAT IS THE KEY STATEMENT IN AN EQUIVALENT MERGE */;

PROC SQL;

CREATE TABLE Both AS

SELECT a.*, b.*

FROM one AS a INNER JOIN

two AS b

ON a.patient=b.patient;

QUIT;

 

The most powerful SQL use is for counts and creating macro variables. The syntax is similar for both so I’ll show the macro variable code creation. This small snippet of code has proven invaluable to me and I’ll never go back to using DATA step for this.

 

PROC SQL NOPRINT;

SELECT TRIM(LEFT(PUT(COUNT(DISTINCT patient),8.))) INTO : trt1 - : trt3

FROM two

GROUP BY treatment;

QUIT;

%PUT trt1=&trt1. trt2=&trt2. trt3=&trt3.;

Log Output

trt1=2 trt2=1 trt3=4

 

Another favourite tip when using SQL has been to produce the macro variables in a dataset table, and store the view table in a permanent directory, so I can view the numbers whenever needed. To do this, use the following code:

 

PROC SQL NOPRINT;

CREATE TABLE kir.macro_vars AS

SELECT name, value

FROM sashelp.vmacro

WHERE scope='GLOBAL' AND name IN ('TRT1' 'TRT2' 'TRT3')

ORDER BY 1;

QUIT;

 

One may open kir.macro_vars to check the macro vars and their values