use strict;
use warnings;

our $data = [];

sub loadData() {
  # skip comments and header:
  while (defined(my $line = <>)) {
    last if $line =~ m{^end,};
  }
  while (defined(my $line = <>)) {
    $line =~ s{\s+$}{};
    my $fields = [split(m{,}, $line)];
    for (my $f = 7; $f < 12; $f++) {
      my $orig = $fields->[$f];
      $fields->[$f] = ($orig =~ m{\d}) ? log($orig + 2) : 0;
    }
    push(@$data, $fields);
  }
}

sub newCondCounts() {
  return {typesCMU => 0, typesZeno => 0, freqs => [0,0,0,0,0]};
}

sub newCounts() {
  return {
    condC => newCondCounts(), 
    typicC => newCondCounts(), 
    oddC => newCondCounts()};
}

sub satisfies($$$$$$$) {
  my($word, $end, $Cpr, $modality, $condTyp, $tested, $nsylls) = @_;
   #print(join(q{,}, @$word), qq{($end, $Cpr, $modality, $condTyp, $tested, $nsylls)\n});
  return 0 if defined($end) and $end ne $word->[0];
  return 0 if defined($Cpr) and qq{/$Cpr/} ne $word->[1];
  if ($modality eq q{Pr}) {
    if ($condTyp eq q{cond}) {
      return 0 if $word->[2] ne q{condVPr};
    } else {  #typ
      return 0 if $word->[2] ne q{typVPr};
    }
  } else {  #Sp
    if ($condTyp eq q{cond}) {
      return 0 if $word->[5] ne q{condVSp};
    } else {  #typ
      return 0 if $word->[5] ne q{typVSp};
    }
  }
  return 0 if defined($tested) and $tested ne $word->[3];
  return 0 if defined($nsylls) and $nsylls != $word->[4];
  return 1;
}

sub tally($$) {
  my($counts, $word) = @_;
  my $result = $word->[6];
  my $c = $counts->{$result};
  $c->{typesCMU}++;
  if ($word->[7]) {$c->{typesZeno}++;}
  for (my $f = 0; $f < 5; $f++) {
    $c->{freqs}->[$f] += $word->[$f + 7];
  }
}

sub reportCountsCond($) {
  my($cc) = @_;
  printf(qq{Types CMU=%4d}, $cc->{typesCMU});
  printf(qq{, Zeno=%4d}, $cc->{typesZeno});
  printf(qq{; freqs Zeno=%5.1f}, $cc->{freqs}->[0]);
  printf(qq{, G2=%4.1f}, $cc->{freqs}->[1]);
  printf(qq{, G3=%4.1f}, $cc->{freqs}->[2]);
  printf(qq{, G5=%4.1f}, $cc->{freqs}->[3]);
  printf(qq{, G13=%4.1f}, $cc->{freqs}->[4]);
  print(qq{\n});
}

sub prop($$) {
  my($this, $other) = @_;
  my $denom = $this + $other;
  if ($denom) {
    my $s = sprintf(q{%.3f}, $this / $denom);
    if (!($s =~ s{^0}{})) {$s =~ s{0$}{};};
    return $s;
  } else {return q{---};}
}

sub reportProportion($) {
  my($counts) = @_;
  my $condCC = $counts->{condC};
  my $typCC = $counts->{typicC};
  printf(qq{Types CMU=%s}, prop($condCC->{typesCMU}, $typCC->{typesCMU}));
  printf(qq{, Zeno=%s}, prop($condCC->{typesZeno}, $typCC->{typesZeno}));
  printf(qq{; freqs Zeno= %s}, prop($condCC->{freqs}->[0], $typCC->{freqs}->[0]));
  printf(qq{, G2=%s}, prop($condCC->{freqs}->[1], $typCC->{freqs}->[1]));
  printf(qq{, G3=%s}, prop($condCC->{freqs}->[2], $typCC->{freqs}->[2]));
  printf(qq{, G5=%s}, prop($condCC->{freqs}->[3], $typCC->{freqs}->[3]));
  printf(qq{, G13=%s}, prop($condCC->{freqs}->[4], $typCC->{freqs}->[4]));
  print(qq{\n});
}

sub reportCounts($) {
  my($counts) = @_;
  print(q{          Conditioned: }); reportCountsCond($counts->{condC});
  print(q{          Typical:     }); reportCountsCond($counts->{typicC});
  print(q{          Cond Prop:   }); reportProportion($counts);
  print(q{          Odd:         }); reportCountsCond($counts->{oddC});
}

sub reportCaseModCondTestedSylls($$$$$$) {
  my($end, $Cpr, $modality, $condTyp, $tested, $nsylls) = @_;
  if (defined($nsylls)) {
    print(qq{        Restricted to words of $nsylls syllables:\n});
  } else {
    print(qq{        All syllable counts:\n});
  }
  my $counts = newCounts();
  foreach my $word (@$data) {
    if (satisfies($word, $end, $Cpr, $modality, $condTyp, $tested, $nsylls)) {
      tally($counts, $word);
    }
  }
  reportCounts($counts);
}

sub reportCaseModCondTested($$$$$) {
  my($end, $Cpr, $modality, $condTyp, $tested) = @_;
  if (defined($tested)) {
    print(qq{      Tested vowels only:\n});
  } else {
    print(qq{      All relevant vowels:\n});
  }
  reportCaseModCondTestedSylls($end, $Cpr, $modality, $condTyp, $tested, 1);
  reportCaseModCondTestedSylls($end, $Cpr, $modality, $condTyp, $tested, undef);  
}

sub reportCaseModCond($$$$) {
  my($end, $Cpr, $modality, $condTyp) = @_;
  print(q{    });
  if ($condTyp eq q{cond}) {
    print(qq{Conditioning environment});
    if (defined($end)) {
      if ($end eq q{final}) {
        if ($modality eq q{Pr}) {
          print(qq{ (Short vowels)});
        } else {
          print(qq{ (Single vowel letter)});
        }
      }
    }
  } else {
    print(qq{Typical environment});
    if (defined($end)) {
      if ($end eq q{final}) {
        if ($modality eq q{Pr}) {
          print(qq{ (Long vowels)});
        } else {
          print(qq{ (Multiple vowel letters)});
        }
      }
    }
  }
  print(qq{:\n});
  
  reportCaseModCondTested($end, $Cpr, $modality, $condTyp, q{strict});
  reportCaseModCondTested($end, $Cpr, $modality, $condTyp, undef);
}

sub reportCaseMod($$$) {
  my($end, $Cpr, $modality) = @_;
  print(qq{  As conditioned by vowel },
     (($modality eq q{Pr}) ? q{Pronunciation} : q{Spelling}),
     qq{:\n});
  reportCaseModCond($end, $Cpr, $modality, q{cond});
  reportCaseModCond($end, $Cpr, $modality, q{typ});
}

sub reportCase($$) {
  my($end, $Cpr) = @_;
  if (defined($end)) {
    print((($end eq q{final}) ? q{Coda} : q{Onset}), qq{ case /$Cpr/:\n});
  } else {
    print(qq{All cases combined:\n});
  }
  reportCaseMod($end, $Cpr, q{Pr});
  reportCaseMod($end, $Cpr, q{Sp});
}

sub doReports() {
  reportCase(q{final}, q{f});
  reportCase(q{final}, q{k});
  reportCase(q{final}, q{l});
  reportCase(q{final}, q{tS});
  reportCase(q{initial}, q{k});
  reportCase(undef, undef);
}

sub main() {
  loadData();
  doReports();
}

main();