=for comment

For each file named on command line, the file is read as a 
     CSV data file.

Top line is read as header giving names of columns. 
Each remaining line gives the trials for an individual subject.
The fields are processed as follows:
   * "name" (any case) - subjects' names. Normalized to all-uppercase,
     diacritics removed (from á, Á, ã, â, é, í, ó, ú, Ú)
   * a single capital letter - trial when that letter was stimulus.
     Normalized to uppercase.
     If reply is not a single letter, it is treated as null and ignored.
   * "Nletter", "sex" - ignored

For each subject:

  The number of letter responses is tallied ($subjNErrors).

  For each letter of the alphabet (as implied from headers):

       If subject's name starts with this letter, 
         environment $ed for this letter is "initial";
       else if letter appears elsewhere in subject's name,
         $ed is "noninitial";
       else (letter appears nowhere in subject's name)
         $ed is "nowhere".

       "Kids" counter is incremented for $ed

       "AnyErr" counter for $ed is increased by $subjNErrors

       For all trials with this subject:
     
         If the response matches this letter, then the "ThisErr"
             counter is incremented for letter-environment $ed

Data counts are printed out for each letter-environment-counter.

=cut

use strict;
use warnings;

use constant DEBUG => 0;

our $norm = {
  q{á} => q{A},
  q{Á} => q{A},
  q{ã} => q{A},
  q{â} => q{A},
  q{é} => q{E},
  q{í} => q{I},
  q{ó} => q{O},
  q{ú} => q{U},
  q{Ú} => q{U},
};

sub Normalize_Letter($) {
  my $letter = shift(@_);
  if ($letter =~ m{^[A-Z]$}) {return $letter;}
  if ($letter =~ m{^[a-z]$}) {return uc($letter);}
  my $tr = $norm->{$letter};
  if (!defined($tr)) {
    die(qq{How to normalize? {$letter}});
  }
  return $tr;
}

sub Normalize_Name($) {
  my $name = shift(@_);
  eval {$name =~ s{(á|Á|ã|â|é|í|ó|ú|Ú|.)}{Normalize_Letter($1)}eg;};
  if ($@) {die(qq{$@\nIn name {$name}\n});}
  $name;
}

package EnvData;
use fields qw{nKidsInGroup nErrorsThisLetter nErrorsAnyLetter};

sub new {
  my $type = shift(@_);
  my $ed = fields::new($type);
  $ed->{nKidsInGroup} = 0;
  $ed->{nErrorsThisLetter} = 0;
  $ed->{nErrorsAnyLetter} = 0;
  return $ed;
}

package LetterData;
use fields qw{letter initial noninitial nowhere};

sub new {
  my $type = shift(@_);
  my $letter = shift(@_);
  my $ld = fields::new($type);
  $ld->{letter} = $letter;
  $ld->{initial} = EnvData->new();
  $ld->{noninitial} = EnvData->new();
  $ld->{nowhere} = EnvData->new();
  return $ld;
}

package main;

sub Analyze_Children($$) {
  my($alphabet, $lines) = @_;
  my $nletters = scalar(@$alphabet);
  if (DEBUG) {print(qq{  $nletters letters in alphabet.\n});}
  my $data = [];
  foreach my $letter (@$alphabet) {
    push(@$data, LetterData->new($letter));
  }
  my $nSubjects = 0;
  foreach my $line (@$lines) {
    my $name = $line->{name};
    if (DEBUG) {print(qq{  $name\n});}
    $nSubjects++;
    my $responsa = $line->{letters};
    my $subjNErrors = 0;
    foreach my $response (@$responsa) {
      $subjNErrors++ if defined($response);
    }
    if (DEBUG) {print(qq{    $subjNErrors letter-name errors\n});}
    foreach my LetterData $ld (@$data) {
      my $letter = $ld->{letter};
      if (DEBUG) {print(qq{    $letter\n});}
      my EnvData $ed;
      if ($name =~ m{^$letter}) {
        if (DEBUG) {print(qq{      initial\n});}
        $ed = $ld->{initial};
      } elsif ($name =~ m{$letter}) {
        if (DEBUG) {print(qq{      noninitial\n});}
        $ed = $ld->{noninitial};
      } else {
        if (DEBUG) {print(qq{      nowhere\n});}
        $ed = $ld->{nowhere};
      }
      $ed->{nKidsInGroup}++;
      $ed->{nErrorsAnyLetter} += $subjNErrors;
      foreach my $response (@$responsa) {
        next unless defined($response);
        if ($response eq $letter) {
          $ed->{nErrorsThisLetter}++;
          if (DEBUG) {print(qq{    Own name letter\n});}
        }
      }
    }
  }
  print(qq{"$nSubjects subjects:"\r\n});
  print(qq{Letter,"[..","..",Init,"..]","[..","..",Noninit,"..]","[..","..",Nowhere,"..]"\r\n});
  print(qq{,Kids,ThisErr,AnyErr,Prop,Kids,ThisErr,AnyErr,Prop,Kids,ThisErr,AnyErr,Prop\r\n});
  foreach my LetterData $letter_counts (@$data) {
    my $letter = $letter_counts->{letter};
    print($letter);
    foreach my $pos (qw{initial noninitial nowhere}) {
      my $pos_counts = $letter_counts->{$pos};
      my $all = $pos_counts->{nKidsInGroup};
      my $this = $pos_counts->{nErrorsThisLetter};
      my $any = $pos_counts->{nErrorsAnyLetter};
      my $prop = ($any > 0) ? sprintf(q{%.3f}, $this / $any) : q{NA};
      print(qq{,$all,$this,$any,$prop});
    }
    print(qq{\r\n});
  }
}

sub Analyze_File($) {
  my $file_name = shift(@_);
  open(my $in, q{<}, $file_name) or die($file_name);
  my $header = <$in>;
  chomp($header);
  my $header_fields = [split(m{,}, $header)];
  my $n_fields = scalar(@$header_fields);
  my $alphabet = [];
  foreach my $hf (@$header_fields) {
    if ($hf =~ m{^[A-Z]$}) {push(@$alphabet, $hf);}
  }
  my $nLetters = scalar(@$alphabet);
  my $lines = [];
  while (defined(my $line = <$in>)) {
    next unless $line =~ m{\S};
    chomp($line);
    my $data_fields = [split(m{,}, $line, -1)];
    my $line_data = {letters => []};
    my $nCorrect = 0;
    push(@$lines, $line_data);
    for (my $f = 0; $f < $n_fields; $f++) {
      my $tag = $header_fields->[$f];
      my $value = $data_fields->[$f];
      if ($tag =~ m{name}i) {
        $line_data->{name} = Normalize_Name($value);
      } elsif ($tag eq q{Nletters}) {
      } elsif ($tag =~ m{^[A-Z]$}) {
        my $answer = ($value =~ m{^[A-Za-z]$}) ? uc($value) : undef;
        push(@{$line_data->{letters}}, $answer);
      } elsif ($tag eq q{sex}) {
      } else {die(qq{$tag: $value});}
    }
  }
  close($in);
  Analyze_Children($alphabet, $lines);
}

sub main() {
  print(qq{"For each letter of the alphabet,"\r
" we look at children who have names that start with, otherwise contain,"\r
"  or lack, that letter. For those children, how many of their letter"\r
"  name errors were this letter? How many of their letter name"\r
"  errors were any real letter? What proportion of the latter is the former?"\r\n});
  foreach my $file_name (@ARGV) {
    my($base_name) = $file_name =~ m{^(.+?)\.};
    print(qq{"$base_name:"\r\n});
    Analyze_File($file_name);
    print(qq{\r\n});
  }

}

main();
