#-*-Mode: perl;-*- eval "\$opt_$1=\$2" while $ARGV[0] =~ /^(\w+)=(.*)/ && shift; print " Deviations in frequency of rimes

Deviations from the expectation in the observed frequency of rimes of English monomorphemic monosyllables

Words are from the Random House Dictionary, 2nd ed. Word frequencies are weighted by the frequencies in the Brown corpus (Francis & Kucera). Deviations are (O - E)^2 / E, deriving the expectation from the frequencies of the vowel and of the coda sequence among observed codas. We report only rimes that have an expected frequency of 5 or more. Frequencies are not weighted by word frequency. "; if (defined($opt_collapse)) { printf("

Treating vowels $opt_collapse as A\n"); } if (defined($opt_zero)) { printf("

Counting 0 freqs in Brown as $opt_zero\n"); } $vowels = "aAcCeEioORuUVWyY"; $nTypes = 0; $nTokens = 0; while ($line = ) { ($pr, $sp, $freq) = split(/\t/, $line); next if $pr eq ''; if (!$freq && defined($opt_zero)) {$freq = $opt_zero;} ($onset, $vowel, $coda) = $pr =~ /^([^$vowels]*)([$vowels])([^$vowels]*)$/o; if (!defined($coda)) {$coda = '';} if (!defined($onset)) {$onset = '';} if (defined $opt_collapse) { if ($vowel =~ /[$opt_collapse]/o) { if ($coda =~ /^r/) {$vowel = 'o';} else {$vowel = 'A';}} } $rime = "$vowel$coda"; &bump(*vfreqtypes, $vowel, 1); &bump(*codafreqtypes, $coda, 1); &bump(*rimefreqtypes, $rime, 1); &eg(*rimeeg, $rime); $nTypes++; $nTokens += $freq; } @rimes = keys(%rimefreqtypes); for $rime (@rimes) { ($vowel, $coda) = $rime =~ /^(.)(.*)$/; $rimeExpect = $vfreqtypes{$vowel} * $codafreqtypes{$coda} / $nTypes; $dev = (($rimefreqtypes{$rime} - $rimeExpect) ** 2) / $rimeExpect; if ($rimefreqtypes{$rime} < $rimeExpect) {$dev *= -1;} if ($rimeExpect >= 5) { push (@devs, sprintf("%.3f $rime $rimeExpect", $dev)); } } @devs = sort numerically @devs; print "

"; for $line (@devs) { ($dev, $rime, $expect) = split(/ /, $line); printf("\n", $dev, $rime, $rimefreqtypes{$rime}, $expect); } print "
Deviations in frequency of rime
DevRimeObservedExpectedExamples
%.2f%s%d%.2f$rimeeg{$rime}
\n"; sub smallest { $smallest = 100000; for $num (@_) { if ($num < $smallest) {$smallest = $num;} } $smallest; } sub numerically { ($aNum) = split(/ /, $a); ($bNum) = split(/ /, $b); $aNum <=> $bNum; } sub eg { local(*table, $key) = @_; if (!defined($table{$key})) {$table{$key} = $sp;} else { $examples = "$table{$key},$sp"; if (length($examples) < 50) {$table{$key} = $examples;} } } sub bump { local(*table, $key, $value) = @_; if (defined($table{$key})) { $table{$key} += $value; } else { $table{$key} = $value; } } print "

This report was generated by a Perl programme. ";