SQL joins

This is a problem I like to ask candidates for database-heavy positions. I shamelessly stole this question from an answer on the Programmers Stack Exchange

Assume the following table structure:

CREATE TABLE Documents (DocID, DocDate);
CREATE TABLE Keywords (KeyWordID, KeyWord);
CREATE TABLE DocumentKeywords (DocID,KeyWordID);

Write queries to return the following:
Part 1: Documents with a DocDate after 4/1/1995
Part 2: Documents that contain the keyword “Blue”
Part 3: Documents that contain the either the keyword “Blue” or “Yellow”
Part 4: Documents that contain both the keywords “Blue” and “Yellow”

Solutions

-- Some test data:
BEGIN TRANSACTION;
CREATE TABLE Documents (DocID, DocDate);
INSERT INTO "Documents" VALUES(0,'1/1/1995');
INSERT INTO "Documents" VALUES(1,'4/1/1995');
INSERT INTO "Documents" VALUES(2,'4/5/1995');
INSERT INTO "Documents" VALUES(3,'4/6/1995');
INSERT INTO "Documents" VALUES(4,'4/7/1995');
CREATE TABLE Keywords (KeyWordID, KeyWord);
INSERT INTO "Keywords" VALUES(0,'Blue');
INSERT INTO "Keywords" VALUES(1,'Yellow');
CREATE TABLE DocumentKeywords (DocID,KeyWordID);
INSERT INTO "DocumentKeywords" VALUES(1,0);
INSERT INTO "DocumentKeywords" VALUES(2,1);
INSERT INTO "DocumentKeywords" VALUES(3,0);
INSERT INTO "DocumentKeywords" VALUES(3,1);
INSERT INTO "DocumentKeywords" VALUES(4,0);
INSERT INTO "DocumentKeywords" VALUES(4,1);
COMMIT;

-- Part 1: Documents with a DocDate after 4/1/1995
SELECT * FROM Documents WHERE DocDate > '4/1/1995';

-- Part 2: Documents that contain the keyword "Blue"
SELECT * FROM Documents
  JOIN DocumentKeywords USING (DocID)
  JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Blue';

-- or
SELECT * 
FROM   documents 
WHERE  docid IN (SELECT docid 
                 FROM   documentkeywords 
                 WHERE  keywordid IN (SELECT keywordid 
                                      FROM   keywords 
                                      WHERE  keyword = 'Blue'))

-- Part 3: Documents that contain the either the keyword "Blue" or "Yellow"
SELECT * FROM Documents
  JOIN DocumentKeywords USING (DocID)
  JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Blue' OR KeyWord = 'Yellow'
-- optionally
GROUP BY DocID;

-- Part 4: Documents that contain the both the keywords "Blue" and "Yellow"

-- using GROUP BY and MAX()
SELECT *
  FROM Documents
  JOIN DocumentKeywords USING (DocID)
  JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Blue' OR KeyWord = 'Yellow'
GROUP BY DocID
HAVING max(KeyWord = 'Yellow') = 1 AND max(KeyWord = 'Blue') = 1

-- the above query being a reduction of this query, which is perhaps clearer
SELECT *, KeyWord = 'Yellow' AS yellow, KeyWord = 'Blue' as blue
  FROM Documents
  JOIN DocumentKeywords USING (DocID)
  JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Blue' OR KeyWord = 'Yellow'
GROUP BY DocID
HAVING max(yellow) = 1 AND max(blue) = 1

-- using GROUP BY and COUNT() (fails on duplicate rows in DocumentKeywords)
SELECT *
  FROM Documents
  JOIN DocumentKeywords USING (DocID)
  JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Blue' OR KeyWord = 'Yellow'
GROUP BY DocID
HAVING count(*) = 2

-- by joining on DocumentKeywords and Keywords twice
SELECT * FROM Documents d
  JOIN DocumentKeywords dkblue   USING (DocID)
  JOIN DocumentKeywords dkyellow USING (DocID)
  JOIN Keywords kblue   ON (dkblue.KeyWordID = kblue.KeyWordID)
  JOIN Keywords kyellow ON (dkyellow.KeyWordID = kyellow.KeyWordID)
  WHERE kyellow.KeyWord = 'Yellow' AND kblue.KeyWord = 'Blue'
  
-- using subselects
SELECT * FROM Documents d
  WHERE DocID IN (SELECT DocID
                  FROM   DocumentKeywords
                  WHERE  KeyWordId IN (SELECT KeyWordId
                                       FROM   Keywords
                                       WHERE  KeyWord = 'Blue'))
  AND   DocID IN (SELECT DocID
                  FROM   DocumentKeywords
                  WHERE  KeyWordId IN (SELECT KeyWordId
                                       FROM   Keywords
                                       WHERE  KeyWord = 'Yellow'));
                                      
-- using INTERSECT and subselects
SELECT * FROM Documents
WHERE DocID IN (SELECT DocID
                FROM   DocumentKeywords
                WHERE  KeyWordId IN (SELECT KeyWordId
                                     FROM   Keywords
                                     WHERE  KeyWord = 'Blue'))
INTERSECT 
SELECT * FROM Documents
WHERE DocID IN (SELECT DocID
                FROM   DocumentKeywords
                WHERE  KeyWordId IN (SELECT KeyWordId
                                     FROM   Keywords
                                     WHERE  KeyWord = 'Yellow'));

-- using INTERSECT and JOIN                                  
SELECT d.* FROM Documents d
  JOIN DocumentKeywords USING (DocID)
  JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Blue'
INTERSECT
SELECT d.* FROM Documents d
  JOIN DocumentKeywords USING (DocID)
  JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Yellow';

-- solution from ANONYMOUS
SELECT
  DISTINCT b.docid
FROM
  (SELECT
     d.DocID
   FROM
     Keywords k JOIN DocumentKeywords d
     ON k.KeywordID = d.KeywordID
     WHERE k.KeyWord = 'Blue') b
INNER JOIN
  (SELECT
     d.DocID
   FROM
     Keywords k JOIN DocumentKeywords d
     ON k.KeywordID = d.KeywordID
     WHERE k.KeyWord = 'Yellow') y
ON
  b.DocID = y.DocID;

Leave a Reply

Your email address will not be published.

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>

 
To comment, click below to log in.