This is a problem I like to ask candidates for database-heavy positions. I shamelessly stole this question from an answer on the Programmers Stack Exchange
Assume the following table structure:
CREATE TABLE Documents (DocID, DocDate);
CREATE TABLE Keywords (KeyWordID, KeyWord);
CREATE TABLE DocumentKeywords (DocID,KeyWordID);
Write queries to return the following:
Part 1: Documents with a DocDate after 4/1/1995
Part 2: Documents that contain the keyword “Blue”
Part 3: Documents that contain the either the keyword “Blue” or “Yellow”
Part 4: Documents that contain both the keywords “Blue” and “Yellow”
Solutions
-- Some test data:
BEGIN TRANSACTION;
CREATE TABLE Documents (DocID, DocDate);
INSERT INTO "Documents" VALUES(0,'1/1/1995');
INSERT INTO "Documents" VALUES(1,'4/1/1995');
INSERT INTO "Documents" VALUES(2,'4/5/1995');
INSERT INTO "Documents" VALUES(3,'4/6/1995');
INSERT INTO "Documents" VALUES(4,'4/7/1995');
CREATE TABLE Keywords (KeyWordID, KeyWord);
INSERT INTO "Keywords" VALUES(0,'Blue');
INSERT INTO "Keywords" VALUES(1,'Yellow');
CREATE TABLE DocumentKeywords (DocID,KeyWordID);
INSERT INTO "DocumentKeywords" VALUES(1,0);
INSERT INTO "DocumentKeywords" VALUES(2,1);
INSERT INTO "DocumentKeywords" VALUES(3,0);
INSERT INTO "DocumentKeywords" VALUES(3,1);
INSERT INTO "DocumentKeywords" VALUES(4,0);
INSERT INTO "DocumentKeywords" VALUES(4,1);
COMMIT;
-- Part 1: Documents with a DocDate after 4/1/1995
SELECT * FROM Documents WHERE DocDate > '4/1/1995';
-- Part 2: Documents that contain the keyword "Blue"
SELECT * FROM Documents
JOIN DocumentKeywords USING (DocID)
JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Blue';
-- or
SELECT *
FROM documents
WHERE docid IN (SELECT docid
FROM documentkeywords
WHERE keywordid IN (SELECT keywordid
FROM keywords
WHERE keyword = 'Blue'))
-- Part 3: Documents that contain the either the keyword "Blue" or "Yellow"
SELECT * FROM Documents
JOIN DocumentKeywords USING (DocID)
JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Blue' OR KeyWord = 'Yellow'
-- optionally
GROUP BY DocID;
-- Part 4: Documents that contain the both the keywords "Blue" and "Yellow"
-- using GROUP BY and MAX()
SELECT *
FROM Documents
JOIN DocumentKeywords USING (DocID)
JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Blue' OR KeyWord = 'Yellow'
GROUP BY DocID
HAVING max(KeyWord = 'Yellow') = 1 AND max(KeyWord = 'Blue') = 1
-- the above query being a reduction of this query, which is perhaps clearer
SELECT *, KeyWord = 'Yellow' AS yellow, KeyWord = 'Blue' as blue
FROM Documents
JOIN DocumentKeywords USING (DocID)
JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Blue' OR KeyWord = 'Yellow'
GROUP BY DocID
HAVING max(yellow) = 1 AND max(blue) = 1
-- using GROUP BY and COUNT() (fails on duplicate rows in DocumentKeywords)
SELECT *
FROM Documents
JOIN DocumentKeywords USING (DocID)
JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Blue' OR KeyWord = 'Yellow'
GROUP BY DocID
HAVING count(*) = 2
-- by joining on DocumentKeywords and Keywords twice
SELECT * FROM Documents d
JOIN DocumentKeywords dkblue USING (DocID)
JOIN DocumentKeywords dkyellow USING (DocID)
JOIN Keywords kblue ON (dkblue.KeyWordID = kblue.KeyWordID)
JOIN Keywords kyellow ON (dkyellow.KeyWordID = kyellow.KeyWordID)
WHERE kyellow.KeyWord = 'Yellow' AND kblue.KeyWord = 'Blue'
-- using subselects
SELECT * FROM Documents d
WHERE DocID IN (SELECT DocID
FROM DocumentKeywords
WHERE KeyWordId IN (SELECT KeyWordId
FROM Keywords
WHERE KeyWord = 'Blue'))
AND DocID IN (SELECT DocID
FROM DocumentKeywords
WHERE KeyWordId IN (SELECT KeyWordId
FROM Keywords
WHERE KeyWord = 'Yellow'));
-- using INTERSECT and subselects
SELECT * FROM Documents
WHERE DocID IN (SELECT DocID
FROM DocumentKeywords
WHERE KeyWordId IN (SELECT KeyWordId
FROM Keywords
WHERE KeyWord = 'Blue'))
INTERSECT
SELECT * FROM Documents
WHERE DocID IN (SELECT DocID
FROM DocumentKeywords
WHERE KeyWordId IN (SELECT KeyWordId
FROM Keywords
WHERE KeyWord = 'Yellow'));
-- using INTERSECT and JOIN
SELECT d.* FROM Documents d
JOIN DocumentKeywords USING (DocID)
JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Blue'
INTERSECT
SELECT d.* FROM Documents d
JOIN DocumentKeywords USING (DocID)
JOIN Keywords USING (KeyWordID)
WHERE KeyWord = 'Yellow';
-- solution from ANONYMOUS
SELECT
DISTINCT b.docid
FROM
(SELECT
d.DocID
FROM
Keywords k JOIN DocumentKeywords d
ON k.KeywordID = d.KeywordID
WHERE k.KeyWord = 'Blue') b
INNER JOIN
(SELECT
d.DocID
FROM
Keywords k JOIN DocumentKeywords d
ON k.KeywordID = d.KeywordID
WHERE k.KeyWord = 'Yellow') y
ON
b.DocID = y.DocID;
