PROC SQL: A fast, efficient, and versatile, subsetable, formatable, sortable, case definable Data Step.
/* creating table (a data set) with specific variables vs. all variables; Examples taken from different references stated above*/
PROC SQL;
CREATE TABLE PATIENT_DATA AS
SELECT PATIENT, DATE, BPS, BPD, DOB, GENDER, Date_of_last_visit
FROM VITAL_RECS;
QUIT;
PROC SQL;
CREATE TABLE PATIENT_DATA AS
SELECT *
FROM VITAL_RECS (DROP=TEMP); /*DATASET OPTIONS ARE USABLE */
QUIT;
/* creating table with deduped data on specific variables */
PROC SQL;
CREATE TABLE PATIDS AS
SELECT DISTINCT PATIENT
FROM VITAL_RECS;
QUIT;
/* creating table with sorted data based on specific variables */
PROC SQL;
CREATE TABLE VISITS AS
SELECT PATIENT_ID, DOB, BPS, BPD,Date_of_last_visit
PROC SQL: A fast, efficient, and versatile, subsetable, formatable, sortable, case definable Data Step.
/* creating table (a data set) with specific variables vs. all variables */
PROC SQL;
CREATE TABLE PATIENT_DATA AS
SELECT PATIENT, DATE, BPS, BPD, DOB, GENDER, Date_of_last_visit
FROM VITAL_RECS;
QUIT;
PROC SQL;
CREATE TABLE PATIENT_DATA AS
SELECT *
FROM VITAL_RECS (DROP=TEMP); /*DATASET OPTIONS ARE USABLE */
QUIT;
/* creating table with deduped data on specific variables */
PROC SQL;
CREATE TABLE PATIDS AS
SELECT DISTINCT PATIENT
FROM VITAL_RECS;
QUIT;
/* creating table with sorted data based on specific variables */
PROC SQL;
CREATE TABLE VISITS AS
SELECT PATIENT_ID, DOB, BPS, BPD,Date_of_last_visit
FROM VITAL_RECS
ORDER BY PATIENT_ID, DATE DESCENDING; /* SORT */;
QUIT;
/* formatting and using functions while creating the table
Note that the function is over the whole data values of the column*/
PROC SQL;
CREATE TABE MEAN_BP AS
SELECT COUNT(*) AS N,
ROUND(MEAN(BPS), 0.01) FORMAT=6.2 AS BPSMEAN, /* FUNCTIONS USAGE */
ROUND(MEAN(BPD),0.01) FORMAT=6.2 AS BPDMEAN
FROM VITAL_RECS;
QUIT;
PROC SQL;
CREATE TABLE HIGH_BP AS
SELECT PATIENT,
COUNT(PATIENT) AS N,
DATE FORMAT=DATE7.,
MAX(BPD) AS BPDHIGH,
ROUND(BPD/CALCULATED BPDHIGH)*100,0.01) FORMAT=6.2 AS BPD_PCT_DIFF
FROM VITAL_RECS
GROUP BY PATIENT_ID
QUIT;
/* A most powerful SQL code */
PROC SQL;
CREATE TABLE HIGH_BP AS
SELECT PATIENT,
COUNT(PATIENT) AS N,
DATE FORMAT=DATE7.,
MAX(BPD) AS BPD_HIGH
FROM VITAL_RECS
WHERE PATIENT_ID IN (101 102 103 105) /* SEE NO COMMAS */
GROUP BY PATIENT_ID
HAVING BPD=CALCULATED BPD_HIGH
ORDER BY CALCULATED BPD_HIGH;
QUIT;
/* output */
PATIENT_ID N DATE BPDHIGH
103 4 14AUG01 89
101 3 01JUN01 92
102 3 06AUG01 97
/* end */
PROC SQL;
CREATE TABLE TEST_MED AS
SELECT PATIENT_ID
CASE CASE((PATIENT/2=INT(PATIENT/2)+(PATIENT=.))
WHEN 1 THEN ‘MED A’ WHEN 0 THEN ‘MED B’ ELSE ‘ERROR’
END AS DOSEGRP LENGTH=5
FROM VITAL_RECS
ORDER BY PATIENT_ID;
QUIT;
PROC SQL;
CREATE TABLE LEFT AS
SELECT A.PATIENT,
A.DATE FORMAT=DATE7. AS DATE,
A.PULSE
B.MED
B.DOSES
B.AMT FORMAT=4.1
FROM VITALS A LEFT JOIN DOSING B
ON (A.PATIENT=B.PATIENT) AND A.DATE=B.DATE
ORDER BY PETIENT, DATE;
QUIT;
PROC SQL NOPRINT;
SELECT DISTINCT PATIENT INTO: PAT1 - : PAT999
FROM VITALS
ORDER BY PATIENT;
QUIT;
PROC SQL NOPRINT;
SELECT DISTINCT PATIENT INTO: PATLIST SEPARATED BY ‘,’
FROM VITALS
ORDER BY PATIENT;
QUIT;
PROC SQL NOPRINT;
SELECT SUM(DRUG=1), SUM(DRUG=2), SUM(1 <=DRUG<=2) INTO: N1, :N2, :N3
FROM ANALY
WHERE GENDER=’MALE’;
QUIT;
PROC SQL;
CREATE TABLE set AS
SELECT * FROM one
OUTER UNION /* SIMILAR TO SET A B; */
SELECT * FROM two;
QUIT;
PROC SQL;
CREATE TABLE Both AS
SELECT a.*, b.*
FROM one AS a FULL JOIN
two AS b
ON a.patient=b.patient;
QUIT;
PROC SQL;
CREATE TABLE ifa1 AS
SELECT a.*,
b.treatment
FROM one AS a LEFT JOIN (/* SIMILAR TO “IF A” IN MERGE */)
two AS b
ON a.patient=b.patient;
QUIT;
/* WHAT IS THE KEY STATEMENT IN AN EQUIVALENT MERGE */;
PROC SQL SYNTAX
PROC SQL;
CREATE TABLE ifb1 AS
SELECT a.age,
a.sex,
b.*
FROM one AS a RIGHT JOIN
two AS b
QUIT;
/* WHAT IS THE KEY STATEMENT IN AN EQUIVALENT MERGE */;
PROC SQL;
CREATE TABLE Both AS
SELECT a.*, b.*
FROM one AS a INNER JOIN
two AS b
ON a.patient=b.patient;
QUIT;
The most powerful SQL use is for counts and creating macro variables. The syntax is similar for both so I’ll show the macro variable code creation. This small snippet of code has proven invaluable to me and I’ll never go back to using DATA step for this.
PROC SQL NOPRINT;
SELECT TRIM(LEFT(PUT(COUNT(DISTINCT patient),8.))) INTO : trt1 - : trt3
FROM two
GROUP BY treatment;
QUIT;
%PUT trt1=&trt1. trt2=&trt2. trt3=&trt3.;
Log Output
trt1=2 trt2=1 trt3=4
Another favourite tip when using SQL has been to produce the macro variables in a dataset table, and store the view table in a permanent directory, so I can view the numbers whenever needed. To do this, use the following code:
PROC SQL NOPRINT;
CREATE TABLE kir.macro_vars AS
SELECT name, value
FROM sashelp.vmacro
WHERE scope='GLOBAL' AND name IN ('TRT1' 'TRT2' 'TRT3')
ORDER BY 1;
QUIT;
One may open kir.macro_vars to check the macro vars and their values
FROM VITAL_RECS
ORDER BY PATIENT_ID, DATE DESCENDING; /* SORT */;
QUIT;
/* formatting and using functions while creating the table
Note that the function is over the whole data values of the column*/
PROC SQL;
CREATE TABE MEAN_BP AS
SELECT COUNT(*) AS N,
ROUND(MEAN(BPS), 0.01) FORMAT=6.2 AS BPSMEAN, /* FUNCTIONS USAGE */
ROUND(MEAN(BPD),0.01) FORMAT=6.2 AS BPDMEAN
FROM VITAL_RECS;
QUIT;
PROC SQL;
CREATE TABLE HIGH_BP AS
SELECT PATIENT,
COUNT(PATIENT) AS N,
DATE FORMAT=DATE7.,
MAX(BPD) AS BPDHIGH,
ROUND(BPD/CALCULATED BPDHIGH)*100,0.01) FORMAT=6.2 AS BPD_PCT_DIFF
FROM VITAL_RECS
GROUP BY PATIENT_ID
QUIT;
/* A most powerful SQL code - so far*/
PROC SQL;
CREATE TABLE HIGH_BP AS
SELECT PATIENT,
COUNT(PATIENT) AS N,
DATE FORMAT=DATE7.,
MAX(BPD) AS BPD_HIGH
FROM VITAL_RECS
WHERE PATIENT_ID IN (101 102 103 105) /* SEE NO COMMAS */
GROUP BY PATIENT_ID
HAVING BPD=CALCULATED BPD_HIGH
ORDER BY CALCULATED BPD_HIGH;
QUIT;
/* output */
PATIENT_ID N DATE BPDHIGH
103 4 14AUG01 89
101 3 01JUN01 92
102 3 06AUG01 97
/* end */
PROC SQL;
CREATE TABLE TEST_MED AS
SELECT PATIENT_ID
CASE CASE((PATIENT/2=INT(PATIENT/2)+(PATIENT=.))
WHEN 1 THEN ‘MED A’ WHEN 0 THEN ‘MED B’ ELSE ‘ERROR’
END AS DOSEGRP LENGTH=5
FROM VITAL_RECS
ORDER BY PATIENT_ID;
QUIT;
PROC SQL;
CREATE TABLE LEFT AS
SELECT A.PATIENT,
A.DATE FORMAT=DATE7. AS DATE,
A.PULSE
B.MED
B.DOSES
B.AMT FORMAT=4.1
FROM VITALS A LEFT JOIN DOSING B
ON (A.PATIENT=B.PATIENT) AND A.DATE=B.DATE
ORDER BY PETIENT, DATE;
QUIT;
PROC SQL NOPRINT;
SELECT DISTINCT PATIENT INTO: PAT1 - : PAT999
FROM VITALS
ORDER BY PATIENT;
QUIT;
PROC SQL NOPRINT;
SELECT DISTINCT PATIENT INTO: PATLIST SEPARATED BY ‘,’
FROM VITALS
ORDER BY PATIENT;
QUIT;
PROC SQL NOPRINT;
SELECT SUM(DRUG=1), SUM(DRUG=2), SUM(1 <=DRUG<=2) INTO: N1, :N2, :N3
FROM ANALY
WHERE GENDER=’MALE’;
QUIT;
PROC SQL;
CREATE TABLE set AS
SELECT * FROM one
OUTER UNION /* SIMILAR TO SET A B; */
SELECT * FROM two;
QUIT;
PROC SQL;
CREATE TABLE Both AS
SELECT a.*, b.*
FROM one AS a FULL JOIN
two AS b
ON a.patient=b.patient;
QUIT;
PROC SQL;
CREATE TABLE ifa1 AS
SELECT a.*,
b.treatment
FROM one AS a LEFT JOIN (/* SIMILAR TO “IF A” IN MERGE */)
two AS b
ON a.patient=b.patient;
QUIT;
/* WHAT IS THE KEY STATEMENT IN AN EQUIVALENT MERGE */;
PROC SQL SYNTAX
PROC SQL;
CREATE TABLE ifb1 AS
SELECT a.age,
a.sex,
b.*
FROM one AS a RIGHT JOIN
two AS b
QUIT;
/* WHAT IS THE KEY STATEMENT IN AN EQUIVALENT MERGE */;
PROC SQL;
CREATE TABLE Both AS
SELECT a.*, b.*
FROM one AS a INNER JOIN
two AS b
ON a.patient=b.patient;
QUIT;
The most powerful SQL use is for counts and creating macro variables. The syntax is similar for both so I’ll show the macro variable code creation. This small snippet of code has proven invaluable to me and I’ll never go back to using DATA step for this.
PROC SQL NOPRINT;
SELECT TRIM(LEFT(PUT(COUNT(DISTINCT patient),8.))) INTO : trt1 - : trt3
FROM two
GROUP BY treatment;
QUIT;
%PUT trt1=&trt1. trt2=&trt2. trt3=&trt3.;
Log Output
trt1=2 trt2=1 trt3=4
Another favourite tip when using SQL has been to produce the macro variables in a dataset table, and store the view table in a permanent directory, so I can view the numbers whenever needed. To do this, use the following code:
PROC SQL NOPRINT;
CREATE TABLE kir.macro_vars AS
SELECT name, value
FROM sashelp.vmacro
WHERE scope='GLOBAL' AND name IN ('TRT1' 'TRT2' 'TRT3')
ORDER BY 1;
QUIT;
One may open kir.macro_vars to check the macro vars and their values