(: 
   Finds and counts all distinct words in Shakespeare's plays (assumes that
   Jon Bosak's XML files are accessible ... the -docs option can be used .

	Shakespeare 2.00 by Jon Bosak can be found at:
	http://metalab.unc.edu/bosak/xml/eg/shaks200.zip

   Copyright X. Franc 2003
:)

let $plays := (
  doc("a_and_c.xml") ,
  doc("all_well.xml"),
  doc("as_you.xml"),
  doc("com_err.xml"),
  doc("coriolan.xml"),
  doc("cymbelin.xml"),
  doc("dream.xml"),
  doc("hamlet.xml"),
  doc("hen_iv_1.xml"),
  doc("hen_iv_2.xml"),
  doc("hen_v.xml"),
  doc("hen_vi_1.xml"),
  doc("hen_vi_2.xml"),
  doc("hen_vi_3.xml"),
  doc("hen_viii.xml"),
  doc("j_caesar.xml"),
  doc("john.xml"),
  doc("lear.xml"),
  doc("lll.xml"),
  doc("m_for_m.xml"),
  doc("m_wives.xml"),
  doc("macbeth.xml"),
  doc("merchant.xml"),
  doc("much_ado.xml"),
  doc("othello.xml"),
  doc("pericles.xml"),
  doc("r_and_j.xml"),
  doc("rich_ii.xml"),
  doc("rich_iii.xml"),
  doc("t_night.xml"),
  doc("taming.xml"),
  doc("tempest.xml"),
  doc("timon.xml"),
  doc("titus.xml"),
  doc("troilus.xml"),
  doc("two_gent.xml"),
  doc("win_tale.xml") 
)

(: ---- remove 'count' to see the words --- :)
return count(
  let $words := 
     distinct-values( for $p in $plays, $n in $p//text()
                      return tokenize($n, "[ \n\r\t\[\]0123456789,:;!.?'-]+") )
  return (for $w in $words order by $w collation "en" return $w)
)
