;; The first three lines of this file were inserted by DrRacket. They record metadata
;; about the language level of this file in a form that our tools can easily process.
#reader(lib 2017-fall-reader.rkt csc104)((modname authorship) (compthink-settings #hash((prefix-types? . #f))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; In this project you will practice manipulating lists by measuring some
;;; statistics from lists of words. At the very end of the file we provide
;;; some *loooong* lists of words (books) to experiment with, in order to
;;; see whether the statistics can distinguish between works by
;;; Jane Austen, Charles Dodgeson, Charles Dickens, and the brothers Grimm.
;;;
;;; Your job is to complete the code below. Everywhere you find XXX, there
;;; is either a check-expect expression or a function to implement or fix
;;; up.
;;;
;;; Work from top to bottom. Get one thing working before you move to the
;;; next. One good technique is to use the expression comment to disable
;;; the check-expects you are not yet ready to deal with, for example:
;;;
#;(check-expect (f x) 17)
;;;
;;; Now you can click Run without seeing the output from many, many
;;; check-expects.
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Some useful constant definitions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; punctuation is a string containing punctuation we
; expect to encounter
(define punctuation !,;:.-?)([]<>*#
tr)
; end-of-sentence punctuation is a string containing
; punctuation we would expect at the end of a sentence
(define terminal-punctuation .!?)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Some useful definitions for string manipulation
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; chop-last-character : string -> string
; Produce s with last character removed, assuming s has
; at least one character.
(check-expect (chop-last-character 0) )
(check-expect (chop-last-character 01) 0)
(check-expect (chop-last-character five)
(substring five 0
(- (string-length five)
1)))
;;; XXX use the check-expect above to fix chop-last-character
;;; definition below
(define (chop-last-character s)
)
; last-character : string -> string
; Return the last character of s. Assume that s
; is at least 1 character long
(check-expect (last-character {anf..!) !)
;;; XXX write a full-design check-expect for (last-character 012)
;;; that uses substring and string-length to produce the
;;; last character of 012
;;; XXX use your check-expect (above) as a guide to fixing
;;; the body of the definition of last-character
(define (last-character s)
)
; strip-trailing-punctuation : string -> string
; Produce a new string with all of trailing punction
; removed from s.
(check-expect (strip-trailing-punctuation one-man-band..!) one-man-band)
(define (strip-trailing-punctuation s)
(cond
;;; XXX question/answer pair for s being an empty string
[#true empty]
;;; XXX question/answer pair if the last character of s is punctuation
[#true !#]
[else ]))
; strip-leading-punctuation : string -> string
; Produce a new string with all leading punctuation removed from s.
(check-expect (strip-leading-punctuation [->#one-man) one-man)
;;; XXX imitate strip-trailing-punctuation (above) to fix
;;; strip-leading-punctuation (below)
(define (strip-leading-punctuation s)
(cond
[#true ]
[#true ]
[else ]))
; non-empty-string? : string -> bool
; Return #true if s is non-empty, #false otherwise
(check-expect (non-empty-string? ) #false)
(check-expect (non-empty-string? ) #true)
;;; XXX produce a full-design check-expect for (non-empty-string? 1)
;;; by using an expression that compares the length of 1 to 0
;;; Then fix function non-empty-string? (below)
(define (non-empty-string? s)
#false)
; sanitize : string -> string
; Produce a new string from s with all leading and trailing punctuation
; stripped and all upper-case characters in lower case;
(check-expect (sanitize [->#oNe:Man-BAnd<-]) one:man-band)
;;; Heres a full-design check-expect for sanitize
(check-expect (sanitize -AbC:dE-)
(strip-leading-punctuation
(strip-trailing-punctuation
(string-lower-case -AbC:dE-))))
;;; XXX Use the check-expect (above) to fix the definition of
;;; sanitize (below)
(define (sanitize s)
)
; unit-terminator? string string -> boolean
; Return whether s terminates a textual unit
; based on whether it contains any of the terminal punctuation
; in t.
(check-expect (unit-terminator? the end .!?) #false)
(check-expect (unit-terminator? fin! .!?) #true)
(define (unit-terminator? s t)
(cond
[(zero? (string-length s)) #false]
;;; XXX else/answer pair using lists-intersect? on s and t
;;; turned into lists of length-1 strings, using string->list
[else false]))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; some useful functions for list, list-of-string,
;;; and list-of-list-of-string
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; lists-intersect? : list list -> boolean
; Does list-1 have any elements in common with list-2?
(check-expect (lists-intersect? (list) (list 5)) #false)
(check-expect (lists-intersect? (list 1 5) (list 5 6)) #true)
(define (lists-intersect? list-1 list-2)
(cond
;;; XXX question/answer for empty list-1
[#true #false]
;;; XXX else either the first element of list-1 is member? list-2 or
;;; the rest of list-1 intersects list-2
[else #false]))
; unit+word : list-of-string string -> list-of-string
; Produce new list-of-string by appending word to unit.
(check-expect (unit+word (list my dog) hAs.!)
(list my dog hAs.!))
(check-expect (unit+word (list one two) three)
(append (list one two) (list three)))
;;; XXX use the full-design check-expect above to guide you
;;; in fixing unit+word below
(define (unit+word unit word)
(list ))
; unit-list+unit : list-of-list-of-string list-of-string
; -> list-of-list-of-string
; Produce a new units, by appending new-unit.
(check-expect (unit-list+unit
(list (list what is) (list up now))
(list doc ))
(append (list (list what is) (list up now)) (list (list doc ))))
;;; XXX use the full-design check-expect above to guide you in fixing
;;; unit-list+unit below
(define (unit-list+unit units new-unit)
(list (list )))
; word-list->text-unit-list :
; list-of-string list-of-string list-of-list-of-string string
; -> list-of-list-of-string
; Produces units, a list of text units, from words, a list of
; words, and next-unit containing the next unit to add to units,
; where terminators contains the punctuation that terminates a unit.
(check-expect (word-list->text-unit-list
(list My dOg, haS . fleas! Okay)
(list) (list) .?!)
(list (list My dOg, haS .) (list fleas!) (list Okay)))
(define (word-list->text-unit-list words next-unit units terminators)
(cond
; no more words, next-unit already added
;;; XXX question/answer if both next-unit and words are empty produce units
[#true (list (list ))]
; no more words, next unit terminated by running out of words
;;; XXX question/answer if just words is empty add next-unit to units
[#true (list (list ))]
; next unit terminated by word containing terminal punctuation
[(unit-terminator? (first words) terminators)
(word-list->text-unit-list
(rest words)
(list)
(unit-list+unit units (unit+word next-unit (first words))) terminators)]
; next sentence is still being built
[else
(word-list->text-unit-list
(rest words) (unit+word next-unit (first words)) units terminators)]))
; word-roster : list-of-string -> list-of-string
; Return a sorted list of unique words in words
(check-expect (word-roster (list my dOg has fleas he has) (list))
(list dog fleas has he my))
(define (word-roster words roster)
(cond
[(empty? words) (sort roster string<?)]
[else
(word-roster
(rest words)
(cond
[(member? (sanitize (first words)) roster) roster]
[else (list* (sanitize (first words)) roster)]))]))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; statistics on lists of words
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; average-word-length : list-of-string -> natural
; Return the average length of standardized words in word-list
(check-expect (average-word-length (list [!one two!! three?)) 11/3)
(define (average-word-length word-list)
(local
[(define sanitized-word-list
(filter non-empty-string? (map sanitize word-list)))]
(cond
[(zero? (length word-list)) 0]
[else
(/ (apply + (map string-length sanitized-word-list))
(length sanitized-word-list))])))
; average-sentence-length : list-of-string -> number
; Return the average number of words in each sentence of words.
(check-expect
(average-sentence-length (list My dOg, haS . fleas!] Okay))
5/3)
(define (average-sentence-length words)
(local [(define sentence-list (word-list->text-unit-list words (list) (list) .!?))
(define word-list (filter non-empty-string? (map sanitize words)))]
;;; XXX divide the length of word list by length of sentence-list
0))
; type-token-ratio : list-of-string -> number
; Return the ratio of unique words to total words
(check-expect (type-token-ratio
(list my dog has fleas he has))
5/6)
(define (type-token-ratio words)
;;; XXX divide length of word-roster of words by the number of sanitized
;;; words in words
(/ (length (word-roster words (list)))
0))
; hapax-legomena : list-of-string -> list-of-string
; Produce a list of words that occur exactly once
; from word-list. Assume word-list is sanitized and
; non-empty.
(check-expect (hapax-legomena
(list one two one three one) (list) (list))
(list three two))
(define (hapax-legomena word-list seen-once seen-twice)
(cond
[(empty? word-list)
(local [(define (not-twice w) (not (member? w seen-twice)))]
(filter not-twice seen-once))]
[(member? (first word-list) seen-once)
(hapax-legomena
(rest word-list) seen-once (list* (first word-list) seen-twice))]
[else
(hapax-legomena
(rest word-list) (list* (first word-list) seen-once) seen-twice)]))
; hapax-legomena-ratio : list-of-string -> number
; Return the ratio of the number of words that occur exactly once
; over the number of distinct words in word-list
(check-expect (hapax-legomena-ratio (list one two one three one))
2/3)
(define (hapax-legomena-ratio word-list)
;;; XXX divide the length of hapax-legoma of sanitized word list with all removed
;;; by the length of the word-roster of word-list
0)
; average-sentence-complexity : list-of-string -> number
; Return the average number of clauses per sentence in word-list
(check-expect (average-sentence-complexity
(list The time has come, the
Walrus said To talk of
many things: of shoes –
and ships – and sealing
wax, Of cabbages; and kings.
And why the sea is boiling
hot; and whether pigs have
wings))
3.5)
(define (average-sentence-complexity words)
(local
[(define sentences (word-list->text-unit-list words (list) (list) .!?))
(define (clausify s) (word-list->text-unit-list s (list) (list) ;,:))
(define clauses (map clausify sentences))]
;;; XXX divide the total length of clauses by the length of sentences
0))
; text-signature : list-of-string -> list-of-number
; Produce a list of average-word-length,
; average-sentence-length, type-token-ratio,
; hapax-legomena-ratio, and average-sentence-complexity
; from words
(check-expect (text-signature
(list The time has come, the
Walrus said To talk of
many things: of shoes –
and ships – and sealing
wax, Of cabbages; and kings.
And why the sea is boiling
hot; and whether pigs have
wings))
(list 139/34 17 14/17 6/7 3.5))
(define (text-signature words)
;;; XXX list of average word length, average sentence length,
;;; type-token ratio, hapax-legomena-ratio, average sentence complexity
(list 0 0 0 0 0))
; text-signature-difference : list-of-string list-of-string -> number
; Report how different words1 and words2 based on
; feature-weights.
(check-expect (text-signature-difference
(list The time has come, the
Walrus said To talk of
many things: of shoes –
and ships – and sealing
wax, Of cabbages; and kings.
And why the sea is boiling
hot; and whether pigs have
wings)
(list The time has come, the
Walrus said To talk of
many things: of shoes –
and ships – and sealing
wax, Of cabbages; and kings.
And why the sea is boiling
hot; and whether pigs have
wings)
(list 1 1 1 1 1))
1)
(check-expect (text-signature-difference
(list my dOg has fleas he has)
(list The time has come, the
Walrus said To talk of
many things: of shoes –
and ships – and sealing
wax, Of cabbages; and kings.
And why the sea is boiling
hot; and whether pigs have
wings)
(list 1 1 1 1 1))
32692/49623)
(define (text-signature-difference words1 words2 feature-weights)
(local
[(define (absolute-ratio x)
(cond
[(<= x 1) x]
[else (/ 1 x)]))]
(/ (apply +
(map absolute-ratio
(map /
(text-signature words1)
(text-signature words2))))
(apply + feature-weights))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; book-length lists of strings to experiment with
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; batch-io provides function read-words to read text from computer files
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;(require 2htdp/batch-io)
;(define mystery0 (read-words mystery_files/mystery0.txt))
;(define mystery1 (read-words mystery_files/mystery1.txt))
;(define mystery2 (read-words mystery_files/mystery2.txt))
;(define mystery3 (read-words mystery_files/mystery3.txt))
;(define mystery4 (read-words mystery_files/mystery4.txt))
;(define mystery5 (read-words mystery_files/mystery5.txt))
Reviews
There are no reviews yet.