#! /usr/bin/env perl
##
## Copyright (C) by Argonne National Laboratory
##     See COPYRIGHT in top-level directory
##

use strict;

# (Tested with -w; 10/5/04)
# 
# Find the parse.sub routine.
my $maintdir = "./maint";
my $rootdir  = ".";
if ( ! -s "maint/parse.sub" ) {
    my $program = $0;
    $program =~ s/extracterrmsgs//;
    if (-s "$program/parse.sub") {
	$maintdir = $program;
	$rootdir  = $program;
	$rootdir  =~ s/\/maint//g;
    }
}
require "$maintdir/parse.sub";

my $debug = 0;
my $careful = 0;        # Set careful to 1 to flag unused messages
my $carefulFilename = "";
my $showfiles = 0;
my $quiet = 0;
my $build_test_pgm = 1;
# FIXME: checkErrClass should be set to 1; currently set to zero
# to permit autogen.sh to complete
my $checkErrClass = 1;

# Strict is used to control checking of error message strings.
my $gStrict = 0;
if (defined($ENV{"DEBUG_STRICT"})) { $gStrict = 1; }

our (%generic_msgs, %generic_loc, %specific_msgs, %specific_loc);
# Hard code usages that are generated by scripts
foreach my $k ("envvarparse", "cvar_val"){
    $generic_msgs{"**$k"}=1;
    $specific_msgs{"**$k"}=1;
    $generic_loc{"**$k"}=":src/util/mpir_cvars.c";
    $specific_loc{"**$k"}=":src/util/mpir_cvars.c";
}

# Check for special args
my $mpi_h = "src/include/mpi.h.in";
my $baseerrnames_txt = "src/mpi/errhan/baseerrnames.txt";
my @files = ();
my %skipFiles = ();
my @errnameFiles = ();
my $outfile = "";
foreach my $arg (@ARGV) {
    if ($arg =~ /^--?showfiles/) { $showfiles = 1; }
    elsif( $arg =~ /^--?debug/) { $debug = 1; }
    elsif( $arg =~ /^--?quiet/) { $quiet = 1; }
    elsif( $arg =~ /^--?notest/) { $build_test_pgm = 0; }
    elsif( $arg =~ /^--?outfile=(.*)/) { $outfile = $1; }
    elsif( $arg =~ /^--?careful=(.*)/) {
	$careful = 1;
	$carefulFilename = $1;
    }
    elsif( $arg =~ /^--?careful/) { $careful = 1; }
    elsif( $arg =~ /^--?strict/)  { $gStrict = 1; }
    elsif( $arg =~ /^--?skip=(.*)/) { $skipFiles{$1} = 1; }
    else {
	print "Adding $arg to files\n" if $debug;
	if (-d $arg) {
	    # Add all .c files from directory $arg to the list of files 
	    # to process (this lets us shorten the arg list)
	    ExpandDir(\@files, $arg);
	}
	else {
            # errname files are treated differently
            if ($arg =~ m{(^|[/\\])errnames.txt$}) {
                push @errnameFiles, $arg;
            }
            else {
                $files[$#files+1] = $arg;
            }
	}
    }
}
# End of argument processing

print "Rootdir = $rootdir\n" if $debug;

# Setup the basic file for errnames - Now determined in ExpandDirs
#@errnameFiles = ( "$rootdir/src/mpi/errhan/errnames.txt" );

my $OUTFD;
if ($outfile ne "") {
    open( $OUTFD, ">$outfile" ) or die "Could not open $outfile\n";
}
else {
    $OUTFD = *STDOUT;
}
# Setup before processing the files
if ($build_test_pgm && -d "test/mpi/errhan") {
    # Get current directory in case we need it for the error message
    my $curdir = `pwd`;
    open( TESTFD, ">test/mpi/errhan/errcode.c" ) or die "Cannot create test program errcode.c in $curdir/test/mpi/errhan\n";
    print TESTFD "/*\
 * Copyright (C) by Argonne National Laboratory\
 *     See COPYRIGHT in top-level directory\
 *\
 * This file is automatically generated by maint/extracterrmsgs\
 * DO NOT EDIT\
 */\n";
    print TESTFD "#include <stdio.h>\n#include <stdlib.h>\n#include \"mpi.h\"\n";
    print TESTFD "#define MPIR_ERR_FATAL 1\n";
    print TESTFD "#define MPIR_ERR_RECOVERABLE 0\n";
    print TESTFD "int MPIR_Err_create_code( int, int, char *, int, int, const char [], const char [], ... );\n";
    print TESTFD "void ChkMsg( int, int, const char [] );\n\n";
    print TESTFD "int main(int argc, char **argv)\n";
    print TESTFD "{\n    int err;\n    MPI_Init( 0, 0 );\n";
}

# Load mpi.h for error class constants
my %mpi_h_constants;
$mpi_h_constants{"MPI_SUCCESS"} = 0;
if (open In, $mpi_h) {
    while (<In>) {
        if (/^#define\s+((MPICH|MPI|MPI_T|MPIX)_ERR_\w+)\s+(.+)/) {
            my ($key, $t) = ($1, $3);
            if ($t=~/^(\d+)/) {
                $mpi_h_constants{$key} = $1;
            } elsif ($t=~/MPICH_ERR_FIRST_MPIX\s*\+\s*(\d+)/) {
                $mpi_h_constants{$key} = $mpi_h_constants{MPICH_ERR_FIRST_MPIX} + $1;
            }
        }
    }
    close In;
} else {
    die "Unable to read $mpi_h\n";
}

my $max_err_class = $mpi_h_constants{MPICH_ERR_LAST_MPIX};
if (!$max_err_class) {
    die "Failed to load MPICH_ERR_LAST_MPIX from $mpi_h\n";
}

# Process the definitions
my (%generic_msgs, %generic_loc, %specific_msgs, %specific_loc);
foreach my $file (@files) {
    print "$file\n" if $showfiles;
    &ProcessFile( $file );
}

#
# Create the hash %longnames that maps the short names to the long names,
# $longnames{shortname} => longname, by reading the errnames.txt files
#
my (%longnames, %longnamesDefined);
foreach my $sourcefile (@errnameFiles) {
    #print STDERR "processing $sourcefile for error names\n";
    &ReadErrnamesFile( $sourcefile );
}

# Load baseerrnames.txt
my @class_msgs;
if (open In, $baseerrnames_txt) {
    while (<In>) {
        if (/^(MPI\w+)\s+(\*\*\w+)/) {
            my ($name, $shortmsg) = ($1, $2, $3);
            my $id = $mpi_h_constants{$name};
            if (defined $id) {
                $generic_msgs{$shortmsg}++;
                $generic_loc{$shortmsg} = ":baseerrnames.txt";
                $class_msgs[$id] = $shortmsg;
            } else {
                die "error class $name not found in mpi.h\n";
            }
        }
    }
    close In;
} else {
    die "Unable to read $baseerrnames_txt\n";
}


# Create the output files from the input that we've read
my (%longnamesUsed, %short_to_num);
&CreateErrmsgsHeader( $OUTFD );
&CreateErrMsgMapping( $OUTFD );

if ($build_test_pgm && -d "test/mpi/errhan") {
    print TESTFD "    MPI_Finalize();\n    return 0;\n}\n";
    close TESTFD;
}    

#
# Generate a list of unused keys
if ($careful) {
    my $OUTFD = *STDERR;
    if ($carefulFilename ne "") {
	open $OUTFD, ">$carefulFilename" or die "Cannot open $carefulFilename";
    }
    foreach my $shortname (keys(%longnames)) {
	if (!defined($longnamesUsed{$shortname}) ||
	    $longnamesUsed{$shortname} < 1) {
	    my $loc = $longnamesDefined{$shortname};
	    print $OUTFD "Name $shortname is defined in $loc but never used\n";
	}
    }
    if ($carefulFilename ne "") {
	close $OUTFD;
    }
}

#-----------------------------------------------------------------------------
# ROUTINES
# ----------------------------------------------------------------------------
# From the data collected above, generate the file containing the error message
# text.
# This is a temporary routine; the exact output form will be defined later
sub CreateErrmsgsHeader {
    my $FD = $_[0];
    print $FD "/*\
 * Copyright (C) by Argonne National Laboratory\
 *     See COPYRIGHT in top-level directory\
 *\
 * This file automatically created by extracterrmsgs\
 * DO NOT EDIT\
 */\n";
    print $FD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__CLASS
typedef struct msgpair {
        const unsigned int sentinal1;
        const char *short_name, *long_name; 
        const unsigned int sentinal2; } msgpair;
#endif\n"
}
#
# We also need a way to create the records
# We then hash these on the first occurrence (or precompute the hashes?)
#
# The error messages are output in the following form:
# typedef struct {const char short[], const long[]} namemap;
# Generic messages
# static const char[] short1 = "";
# static const char[] long1 = "";
# ...
# static const namemap[] = { {short1, long1}, {...} }
#
sub CreateErrMsgMapping {
    my $OUTFD = $_[0];

    # For the case of classes only, output the strings for the class 
    # messages
    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL == MPICH_ERROR_MSG__CLASS\n";
    print $OUTFD "static const char *classToMsg[] = {\n";
    for (my $i=0; $i<=$max_err_class; $i++) {
	my $shortname = $class_msgs[$i];
        my $msg       = $longnames{$shortname};
        print $OUTFD "    \"$msg\", /* $i  $class_msgs[$i] */\n";
    }
    print $OUTFD "    NULL\n};\n";
    print $OUTFD "#endif /* MSG_CLASS */\n";

    # Now, output each short,long key
    # Do the generic, followed by the specific, messages
    # The long messages must be available for the generic message output.
    # An alternative is to separate the short from the long messages;
    # the long messages are needed for > MSG_NONE, the short for > MSG_CLASS.
    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__CLASS\n";
    print $OUTFD "/* The names are in sorted order, allowing the use of a simple\
  linear search or bisection algorithm to find the message corresponding to\
  a particular message */\n";
    my @sorted_generic_msgs = sort keys %generic_msgs;

    # add a dummy UNKNOWN entry in the front.
    # NOTE: assume all other generic message are lowercase so "UNKNOWN" will
    #       be ordered first. This is critical because FindGenericMsgIndex assumes
    #       this ordering!
    unshift @sorted_generic_msgs, "**UNKNOWN";
    $generic_loc{"**UNKNOWN"} = ":[NONE]";
    $longnames{"**UNKNOWN"} = "Unknown error class";

    my $num = 0;
    foreach my $key (@sorted_generic_msgs)
    {
	my $longvalue = "\"\0\"";
	if (!defined($longnames{$key}))
	{
	    my $seenfile = $generic_loc{$key};
	    if ($key =~ /^\*\*/) {
		# If the message begins with text, assume that it is a 
		# literal message
		print STDERR "Shortname $key for generic messages has no expansion (first seen in file $seenfile)\n";
	    }
	    next;
	}
	else {
	    # Keep track of which messages we have seen
	    $longnamesUsed{$key} += 1;
	}
	
	# Escape any naked quotes (This should be applied somewhere else?)
#	$longvalue = s/(?<!\\)\"/\\\"/;

	$longvalue = "\"" . $longnames{$key} . "\"";

	print $OUTFD "static const char short_gen$num\[\] = \"$key\";\n";
#	print $OUTFD "static const char short_gen$num\[\] = $key;\n";
	print $OUTFD "static const char long_gen$num\[\]  = $longvalue;\n";
	# Remember the number assigned to this short string.
	$short_to_num{$key} = $num;
	$num ++;
    }
    # Generate the mapping of short to long names
    print $OUTFD "\nstatic const int generic_msgs_len = $num;\n";

    # The sentinels should be hardcoded into the source file that
    # uses this file to ensure that the sentinel tests are ok.
    my $sentinal1 = "0xacebad03";
    my $sentinal2 = "0xcb0bfa11";
    print $OUTFD "static const msgpair generic_err_msgs[] = {\n";
    for (my $i = 0; $i < $num; $i ++) {
	print $OUTFD "    { $sentinal1, short_gen$i, long_gen$i, $sentinal2 }";
	print $OUTFD "," if ($i < $num - 1);
	print $OUTFD "\n";
    }
    print $OUTFD "};\n";
    print $OUTFD "#endif\n\n";

    $num = 0;
    # Now output the instance specific messages
    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__GENERIC\n";
    foreach my $key (sort keys %specific_msgs)
    {
	my $longvalue = "\"\0\"";

	if (!defined($longnames{$key}))
	{
	    print STDERR "Shortname $key for specific messages has no expansion (first seen in file $specific_loc{$key})\n";
	    next;
	}
	else {
	    # Keep track of which messages we have seen
	    $longnamesUsed{$key} += 1;
	}

	# Escape any naked quotes
	$longvalue =~ s/(?<!\\)\"/\\\"/;
	$longvalue = "\"" . $longnames{$key} . "\"";

	print $OUTFD "static const char short_spc$num\[\] = \"$key\";\n";
#	print $OUTFD "static const char short_spc$num\[\] = $key;\n";
	print $OUTFD "static const char long_spc$num\[\]  = $longvalue;\n";
	$num ++;
    }
    # Generate the mapping of short to long names

    print $OUTFD "\nstatic const int specific_msgs_len = $num;\n";
    print $OUTFD "static const msgpair specific_err_msgs[] = {\n";
    for (my $i = 0; $i < $num ; $i ++) {
	print $OUTFD "    { $sentinal1, short_spc$i, long_spc$i, $sentinal2 }";
	print $OUTFD "," if ($i < $num - 1);
	print $OUTFD "\n";
    }
    print $OUTFD "};\n";
    print $OUTFD "#endif\n\n";

    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG__CLASS\n";
    my $maxval = $max_err_class + 1;
    print $OUTFD "static int class_to_index[] = {\n    ";
    for (my $i=0; $i<=$max_err_class; $i++) {
        my $idx = $short_to_num{$class_msgs[$i]};
        if (!$idx) {
            # 0 is the "**UNKNOWN" entry
            $idx = 0;
        }
        print $OUTFD "$idx";
	print $OUTFD "," if ($i < $max_err_class);
	print $OUTFD "\n    " if !(($i + 1) % 10);
    }
    print $OUTFD "\n};\n";
    print $OUTFD "#endif\n";
}
#
# Add a call to test this message for the error message.
# Handle both the generic and specific messages
#
my (%test_generic_msg, %test_specific_msg);
sub AddTestCall {

    my ($filename, $genericArgLoc, @msg_args) = @_;
    
    my $last_errcode = "MPI_SUCCESS";  # $_[0];
    my $fatal_flag   = "MPIR_ERR_RECOVERABLE"; # $_[1];
    my $fcname       = "unknown"; # $_[2];
    my $linenum      = "__LINE__"; # $_[3];
    my $errclass     = "MPI_ERR_OTHER"; # $_[4];

    my $generic_msg = $msg_args[$genericArgLoc];
    my $specific_msg = $msg_args[$genericArgLoc+1];
    if (!defined $specific_msg) {
        $specific_msg = "0";
    }

    # Ensure that the last_errcode, class and fatal flag are specified.  There are a few places where these are variables.
    if (!($last_errcode =~ /MPI_ERR_/) )
    {
	$last_errcode = "MPI_SUCCESS";
    }
    if (!($errclass =~ /MPI_ERR_/) )
    {
	$errclass = "MPI_ERR_OTHER";
    }
    if (!($fatal_flag =~ /MPIR_ERR_FATAL/) && !($fatal_flag =~ /MPIR_ERR_RECOVERABLE/))
    {
	$fatal_flag = "MPIR_ERR_FATAL";
    }

    # Generic message (first instance only)
    if (!defined($test_generic_msg{$generic_msg}))
    {
	$test_generic_msg{$generic_msg} = $filename;

	print TESTFD "    /* $filename */\n";
	print TESTFD "    err = MPIR_Err_create_code($last_errcode, $fatal_flag, \"errcode::main\", __LINE__, $errclass, ". 
	    "$generic_msg, 0);\n";
	print TESTFD "    ChkMsg( err, $errclass, $generic_msg );\n";
    }

    # Specific messages
    $specific_msg = "0" if ($specific_msg =~ /^\s*NULL\s*$/);
    if ($specific_msg ne "0" && !defined($test_specific_msg{$specific_msg}))
    {
	$test_specific_msg{$specific_msg} = $filename;

	print TESTFD "    {\n";
	print TESTFD "    /* $filename */\n";
	# Use types in the string to create the types with default
	# values
	my $format = $specific_msg;
	my $fullformat = $format;
	my $narg = 0;
	my @args = ();
	while ($format =~ /[^%]*%(.)(.*)/)
	{
	    my $type = $1; 
	    $format  = $2;
	    $narg ++;
	    if ($type eq "d")
	    {
		print TESTFD "    int i$narg = $narg;\n";
		$args[$#args+1] = "i$narg";
	    }
            elsif ($type eq "x")
            {
                print TESTFD "    int i$narg = $narg;\n";
                $args[$#args+1] = "i$narg";
            }
	    elsif ($type eq "L") 
	    {
		print TESTFD "    long long i$narg = $narg;\n";
		$args[$#args+1] = "i$narg";
	    }
            elsif ($type eq "X")
            {
                print TESTFD "    long long i$narg = $narg;\n";
                $args[$#args+1] = "i$narg";
            }
	    elsif ($type eq "i")
	    {
		print TESTFD "    int i$narg = $narg;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "t")
	    {
		print TESTFD "    int i$narg = $narg;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "s")
	    {
		print TESTFD "    char s$narg\[\] = \"string$narg\";\n";
		$args[$#args+1] = "s$narg";
	    }
	    elsif ($type eq "p")
	    {
		print TESTFD "    char s$narg\[\] = \"pointer$narg\";\n";
		$args[$#args+1] = "s$narg";
	    }
	    elsif ($type eq "C")
	    {
		print TESTFD "    int i$narg = MPI_COMM_WORLD;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "I")
	    {
		print TESTFD "    int i$narg = MPI_INFO_NULL;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "D")
	    {
		print TESTFD "    int i$narg = MPI_INT;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "F")
	    {
		# This must be an MPI_File since that type may not
		# be an integer (it is a pointer at this time)
		print TESTFD "    MPI_File i$narg = MPI_FILE_NULL;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "W")
	    {
		print TESTFD "    int i$narg = MPI_WIN_NULL;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "A")
	    {
		print TESTFD "    int i$narg = $narg;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "G")
	    {
		print TESTFD "    int i$narg = MPI_GROUP_NULL;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "O")
	    {
		print TESTFD "    int i$narg = MPI_SUM;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "R")
	    {
		print TESTFD "    int i$narg = MPI_REQUEST_NULL;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "E")
	    {
		print TESTFD "    int i$narg = MPI_ERRORS_RETURN;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "K")
	    {
		print TESTFD "    int i$narg = MPI_KEYVAL_INVALID;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "S")
	    {
		print TESTFD "    int i$narg = MPI_SESSION_NULL;\n";
		$args[$#args+1] = "i$narg";
	    }
            elsif ($type eq "c")
            {
                print TESTFD "    MPI_Count i$narg = $narg;\n";
                $args[$#args+1] = "i$narg";
            }
	    else
	    {
		print STDERR "Unrecognized format type $type for $fullformat in $filename\n";
	    }
	}   
	my $actargs = $#msg_args - $genericArgLoc - 1;
	if ($actargs != $narg)
	{
	    print STDERR "Error: Format $fullformat provides $narg arguments but call has $actargs in $filename\n";
	}
	print TESTFD "     err = MPIR_Err_create_code($last_errcode, $fatal_flag, \"errcode::main\", __LINE__, $errclass, " .
	    "$generic_msg, $specific_msg";
	foreach my $arg (@args) 
	{
	    print TESTFD ", $arg";
	}
	print TESTFD " );\n";
	print TESTFD "    ChkMsg( err, $errclass, $specific_msg );\n    }\n";
	# ToDo: pass another string to ChkMsg that contains the 
	# names of the variables, as a single string (comma separated).
	# This allows us to review the source of the values for the args.
    }
}

# ==========================================================================
# Read an errnames file.  This allows us to distribute the errnames.txt
# files in the relevant modules, rather than making them part of one
# single main file.
# This updates the global hashes longnames and longnamesDefined
#  ReadErrnamesFile( filename )
# ==========================================================================
sub ReadErrnamesFile {
    my $sourcefile = $_[0];

    open( FD, "<$sourcefile" ) or return 0;
    my $linecount = 0;
    while (<FD>) {
	$linecount++;
	# Skip Comments
	if (/^\s*\#/) { next; }
	# Read entire error message (allow \ at end of line to continue)
	if (/^\s*(\*\*[^:]*):(.*)$/) {
	    my $name = $1;
	    my $repl = $2;
	    $repl =~ s/\r*\n*$//g;
	    while ($repl =~ /\\\s*$/) {
		# If there is a \\ at the end, read another.  
		# Remove the \ at the end (an alternative is to turn
		# it into a \n (newline), but we may want to avoid 
		# multiline messages
		$repl =~ s/\\\s*$//;
		my $inline = <FD>;
		$linecount++;
		$inline =~ s/^\s*//;   # remove leading spaces
		$repl .= $inline;
		$repl =~ s/[\r\n]*$//g; # remove newlines
	    }

	    # Check that the name and the replacement text at least
	    # partially match as to format specifiers
	    # (They should have exactly the same pattern, i.e., 
	    # if the name has %d %x in is, the replacement should 
	    # have %d %x, in that order)
	    my $namehasformat = ($name =~ /%/);
	    my $replhasformat = ($repl =~ /%/);
	    if ($namehasformat != $replhasformat) {
		print STDERR "Warning: format control usage in $name and $repl do not agree in $sourcefile\n";
	    }
#	    if (!defined($longnames{"\"$name\""}))
#	    {
#		$longnames{"\"$name\""} = $repl;
#		$longnamesDefined{"\"$name\""} = "$sourcefile:$linecount";
#	    }
	    # Check that the replacement text doesn't include a unquoted
	    # double quote
	    if ($repl =~ /(.)\"/) {
		my $prechar = $1;
		if ($1 ne "\\") {
		    print STDERR "Warning: Replacement text for $name contains an unescaped double quote: $repl\n";
		}
	    }
	    if (!defined($longnames{$name}))
	    {
		$longnames{$name} = $repl;
		$longnamesDefined{$name} = "$sourcefile:$linecount";
	    }
	    else
	    {
		print STDERR "Warning: attempt to redefine $name.  Duplicate ignored.\n";
		print STDERR "Previously defined at $longnamesDefined{$name} with value \"$longnames{$name}\"\n";
		print STDERR "Redefined at $sourcefile:$linecount with value \"$repl\"\n";
	    }
	}
    }
    close( FD );
}

# ==========================================================================
# Call this for each file
# This reads a C source or header file and adds does the following:
#   adds any generic message short names encountered to the hash generic_msgs.
#   adds any specific message short names encounter to the hash specific_msgs.
#   adds the filename to the hash generic_loc{msg} as the value (: separated)
#       and the same for hash specific_loc{msg}.
#   The last two are used to provide better error reporting.
#
my %KnownErrRoutines;
my %bad_syntax_in_file;

sub ProcessFile
{ 
    if (!%KnownErrRoutines) {
        load_KnownErrRoutines();
    }

    my $filename = $_[0];
    my $linecount = 0;
    my $remainder;
    open (my $FD, "<$filename" ) or die "Could not open $filename\n";

    while (<$FD>) {
	$linecount++;
	# Skip code that is marked as ignore (e.g., for
	# macros that are used to simplify the use of MPIR_Err_create_code
	# (such macros must also be recognized and processed)
	if (/\/\*\s+--BEGIN ERROR MACROS--\s+\*\//) {
	    while (<$FD>) {
		$linecount++;
		if (/\/\*\s+--END ERROR MACROS--\s+\*\//) { last; }
	    }
	    $remainder = "";
	    next;
	}
	# Next, remove any comments
	$_ = StripComments($FD, $_ );
	# Skip the definition of the function
	if (/int\s+MPI[OUR]_Err_create_code/) { $remainder = ""; next; }
	while (/(MPI[OUR]_Err[A-Za-z0-9_]+)\s*(\(.*)$/i) {
	    my $routineName = $1;
	    my $arglist     = $2;
            if ($routineName =~ /MPIR_ERR_(CHECK|POP|ADD|GET_CLASS|COLL_CHECKANDCONT|is_fatal)/i) {
                # skip known false positives
                next;
            }
	    if (!defined($KnownErrRoutines{$routineName})) {
		print "Skipping $routineName\n" if $debug;
		last;
	    }
	    print "Found $routineName\n" if $debug;
	    my ($genericArgLoc,$hasLine,$hasSpecific,$onlyIndirect,$errClassLoc) = 
		split(/:/,$KnownErrRoutines{$routineName});

            my ($leader, @args);
	    ($leader, $remainder, @args ) = &GetSubArgs($FD, $arglist );
	    # Discard leader 
	    if ($debug) {
		print "Line begins with $leader\n";   # Use $leader to keep -w happy
		foreach my $arg (@args) {
		    print "|$arg|\n";
		}
	    }
	    # Process the signature
	    
	    # if signature does not match new function prototype, then skip it
	    if ($#args < $genericArgLoc) {
		if (!defined($bad_syntax_in_file{$filename})) {
		    $bad_syntax_in_file{$filename} = 1;
		    print STDERR "Warning: $routineName call with too few arguments in $filename\n";
		}
		next;
	    }
	    if ($hasLine >= 0 && 
		($args[$hasLine] ne "__LINE__" && $args[$hasLine] ne "line")) {
		if (!defined($bad_syntax_in_file{$filename})) {
		    $bad_syntax_in_file{$filename} = 1;
		    my $tmpi = $hasLine + 1;
		    print STDERR "Warning: Expected __LINE__ or line as ${tmpi}th argument of $routineName in $filename\n";
		}
		next;
	    }
	    if ($errClassLoc >= 0 && $checkErrClass) {
		if (!($args[$errClassLoc] =~ /^MPI_ERR_/)  &&
		    !($args[$errClassLoc] =~ /^MPI_T_ERR_/) &&
		    !($args[$errClassLoc] =~ /^MPIDI_CH3I_SOCK_ERR_/) &&
		    !($args[$errClassLoc] =~ /^MPIX_ERR_/) &&
		    !($args[$errClassLoc] =~ /^errclass/) &&
                    !($args[$errClassLoc] =~ /^errflag/) &&
                    !($args[$errClassLoc] =~ /^\*errflag/)) {
		    $bad_syntax_in_file{$filename} = 1;
		    print STDERR "Invalid argument $args[$errClassLoc] for the MPI Error class in $routineName in $filename\n";
		    next;
		}
	    }
	    
	    #my $last_errcode = $args[0];
	    #my $fatal_flag = $args[1];
	    #my $fcname = $args[2];
	    #my $linenum = $args[3];
	    #my $errclass = $args[4];
	    my $generic_msg = $args[$genericArgLoc];
	    my $specific_msg = "0";
	    if ($hasSpecific) {
		$specific_msg = $args[$genericArgLoc+1];
	    }

	    # Check the generic and specific message arguments
	    if ($generic_msg =~ /\s$/)
	    {
		print STDERR "Warning: trailing blank on arg $generic_msg in $filename!\n"; 
	    }
	    if ($onlyIndirect && !($generic_msg =~ /^\"\*\*\S+\"$/)) {

		print STDERR "Error: generic message $generic_msg has incorrect format in $filename\n";
		next;
	    }
	    if ($generic_msg =~ /%/) {
		print STDERR "Warning: generic message $generic_msg in $filename contains a format control\n";
	    }
		 
	    $specific_msg = "0" if ($specific_msg =~ /^\s*NULL\s*$/);
	    if ($specific_msg =~ /^[1-9]/)
	    {
		print STDERR "Error: instance specific message $specific_msg in $filename is an invalid integer ". 
		    "(must be 0 or a string)\n";
		next;
	    }
	    if ($specific_msg eq $generic_msg)
	    {
		print STDERR "Warning: generic and instance specific messages must be different " .
		    "(file $filename, message $generic_msg)\n";
	    }
	    if ($specific_msg ne "0" && !($specific_msg =~ /\%/))
	    {
		print STDERR "Warning: instance specific message $specific_msg in $filename contains no format control\n";
	    }
	    if ($specific_msg =~ /%/) {
		# Specific message includes format values.  Check
		# for number and for valid strings if %s
		my $nargs = 0;
		my $tmpmsg = $specific_msg;
		my @stringLocs = ();
		while ($tmpmsg =~ /[^%]*%(.)(.*)/) {
		    $tmpmsg = $2;
		    my $followchar = $1;
		    if ($followchar eq "s") {
			$stringLocs[$#stringLocs+1] = $nargs;
		    }
		    if ($followchar ne "%") {
			$nargs ++;
		    }
                    if (! ($followchar =~ /[%sdxitpcDCRWOEIGFALXKS]/) ) {
			print STDERR "Warning: Unrecognized format specifier in specific message $specific_msg in $filename\n";
		    }
		}
		if ($nargs != $#args - $genericArgLoc - 1) {
		    my $actargs = $#args - $genericArgLoc - 1;
		    print STDERR "Warning: wrong number of arguments for instance specific message $specific_msg in $filename; expected $nargs but found $actargs\n";
		}
		elsif ($#stringLocs >= 0 && $gStrict) {
		    # Check for reasonable strings if strict checking requested
		    for (my $i=0; $i<=$#stringLocs; $i++) {
			my $index = $stringLocs[$i];
			my $string = $args[$genericArgLoc+2+$index];
			if ($string =~ /\"/) {
			    # Allow a few special cases:
			    # Always: all uppercase and _, single word
			    my $stringOk = 0;
			    if ($string =~ /^\"[A-Z_]*\"$/) {
				$stringOk = 1;
			    }
			    elsif ($string =~ /^\"\w*\"$/) {
				if (1) { $stringOk = 1; }
			    }
			    if (!$stringOk) {
				print STDERR "Warning: explicit string as argument to specific message $specific_msg in $filename; explicit string is $string\n";
			    }
			}
		    }
		}
	    }

	    if ($build_test_pgm) {
		&AddTestCall($filename, $genericArgLoc, @args )
	    }

	    if ($generic_msg =~ /^\"(.*)\"$/) {
		$generic_msg = $1;
		$generic_msgs{$generic_msg}++;
		$generic_loc{$generic_msg} .= ":$filename";
	    }
	    else {
		$generic_msgs{$generic_msg}++;
		$generic_loc{$generic_msg} .= ":$filename";
	    }

	    if ($specific_msg =~ /^\"(\*\*.*)\"/)
	    {
		$specific_msg = $1;
		$specific_msgs{$specific_msg}++;
		$specific_loc{$specific_msg} .= ":$filename";
	    }
	}
	continue
        {
            $_ = $remainder;
        }
    }		
    close $FD;
}

# Get all of the .c files from the named directory, including any subdirs
# Also, add any errnames.txt files to the errnamesFiles arrays
sub ExpandDir {
    my ($files, $dir) = @_;
    my @otherdirs = ();
    opendir DIR, "$dir";
    while (my $filename = readdir DIR) {
	if ($filename =~ /^\./) {
	    next;
	}
	elsif (-d "$dir/$filename") {
	    $otherdirs[$#otherdirs+1] = "$dir/$filename";
	}
	elsif ($filename =~ /(.*\.[chi])(pp){0,1}$/) {
	    # Test for both Unix- and Windows-style directory separators
	    if (!defined($skipFiles{"$dir/$filename"}) &&
		!defined($skipFiles{"$dir\\$filename"})) {
                push @$files, "$dir/$filename";
	    }
	}
	elsif ($filename eq "errnames.txt") {
	    $errnameFiles[$#errnameFiles+1] = "$dir/$filename";
	}
    }
    closedir DIR;
    # (almost) tail recurse on otherdirs (we've closed the directory handle,
    # so we don't need to worry about it anymore)
    foreach my $dir (@otherdirs) {
        ExpandDir($files, $dir);
    }
}

sub load_KnownErrRoutines {
    # Match the known routines and macros.
    # Then check that the arguments match if there is a
    # specific string (number of args matches the number present)
    # MPIR_ERR_CHK(FATAL)?ANDJUMP[1-4]?(cond,code,class,gmsg[,smsg,args])
    # MPIR_ERR_SET(FATAL)?ANDJUMP[1-4]?(code,class,gmsg[,smsg,args])
    # MPIR_ERR_CHK(FATAL)?ANDSTMT[1-4]?(cond,code,class,stmt,gmsg[,smsg,args])
    # MPIR_ERR_SET(FATAL)?ANDSTMT[1-4]?(code,class,stmt,gmsg[,smsg,args])
    # Value is a tuple of:
    #  the count of args where the generic msg begins (starting from 0)
    #  location of __LINE__ (-1 for none)
    #  specific msg arg required (0 for no, > 0 for yes)
    #  is the generic message an indirect from errnames.txt (1=yes 0=no)
    #  location of the error class
    %KnownErrRoutines = (
        'MPIR_Err_create_code'      => '5:3:1:1:4',
        'MPIO_Err_create_code'      => '5:3:1:0:-1',
        'MPIR_ERR_SET'              => '2:-1:0:1:1',
        'MPIR_ERR_SETSIMPLE'        => '2:-1:0:1:1',
        'MPIR_ERR_SET1'             => '2:-1:1:1:1',
        'MPIR_ERR_SET2'             => '2:-1:2:1:1',
        'MPIR_ERR_SETANDSTMT'       => '3:-1:0:1:1',
        'MPIR_ERR_SETANDSTMT1'      => '3:-1:1:1:1',
        'MPIR_ERR_SETANDSTMT2'      => '3:-1:1:1:1',
        'MPIR_ERR_SETANDSTMT3'      => '3:-1:1:1:1',
        'MPIR_ERR_SETANDSTMT4'      => '3:-1:1:1:1',
        'MPIR_ERR_SETANDJUMP'       => '2:-1:0:1:1',
        'MPIR_ERR_SETANDJUMP1'      => '2:-1:1:1:1',
        'MPIR_ERR_SETANDJUMP2'      => '2:-1:1:1:1',
        'MPIR_ERR_SETANDJUMP3'      => '2:-1:1:1:1',
        'MPIR_ERR_SETANDJUMP4'      => '2:-1:1:1:1',
        'MPIR_ERR_CHKANDSTMT'       => '4:-1:0:1:2',
        'MPIR_ERR_CHKANDSTMT1'      => '4:-1:1:1:2',
        'MPIR_ERR_CHKANDSTMT2'      => '4:-1:1:1:2',
        'MPIR_ERR_CHKANDSTMT3'      => '4:-1:1:1:2',
        'MPIR_ERR_CHKANDSTMT4'      => '4:-1:1:1:2',
        'MPIR_ERR_CHKANDJUMP'       => '3:-1:0:1:2',
        'MPIR_ERR_CHKANDJUMP1'      => '3:-1:1:1:2',
        'MPIR_ERR_CHKANDJUMP2'      => '3:-1:1:1:2',
        'MPIR_ERR_CHKANDJUMP3'      => '3:-1:1:1:2',
        'MPIR_ERR_CHKANDJUMP4'      => '3:-1:1:1:2',
        'MPIR_ERR_SETFATAL'         => '2:-1:0:1:1',
        'MPIR_ERR_SETFATALSIMPLE'   => '2:-1:0:1:1',
        'MPIR_ERR_SETFATAL1'        => '2:-1:1:1:1',
        'MPIR_ERR_SETFATAL2'        => '2:-1:2:1:1',
        'MPIR_ERR_SETFATALANDSTMT'  => '3:-1:0:1:1',
        'MPIR_ERR_SETFATALANDSTMT1' => '3:-1:1:1:1',
        'MPIR_ERR_SETFATALANDSTMT2' => '3:-1:1:1:1',
        'MPIR_ERR_SETFATALANDSTMT3' => '3:-1:1:1:1',
        'MPIR_ERR_SETFATALANDSTMT4' => '3:-1:1:1:1',
        'MPIR_ERR_SETFATALANDJUMP'  => '2:-1:0:1:1',
        'MPIR_ERR_SETFATALANDJUMP1' => '2:-1:1:1:1',
        'MPIR_ERR_SETFATALANDJUMP2' => '2:-1:1:1:1',
        'MPIR_ERR_SETFATALANDJUMP3' => '2:-1:1:1:1',
        'MPIR_ERR_SETFATALANDJUMP4' => '2:-1:1:1:1',
        'MPIR_ERR_CHKFATALANDSTMT'  => '4:-1:0:1:2',
        'MPIR_ERR_CHKFATALANDSTMT1' => '4:-1:1:1:2',
        'MPIR_ERR_CHKFATALANDSTMT2' => '4:-1:1:1:2',
        'MPIR_ERR_CHKFATALANDSTMT3' => '4:-1:1:1:2',
        'MPIR_ERR_CHKFATALANDSTMT4' => '4:-1:1:1:2',
        'MPIR_ERR_CHKFATALANDJUMP'  => '3:-1:0:1:2',
        'MPIR_ERR_CHKFATALANDJUMP1' => '3:-1:1:1:2',
        'MPIR_ERR_CHKFATALANDJUMP2' => '3:-1:1:1:2',
        'MPIR_ERR_CHKFATALANDJUMP3' => '3:-1:1:1:2',
        'MPIR_ERR_CHKFATALANDJUMP4' => '3:-1:1:1:2',
        'MPIR_ERRTEST_VALID_HANDLE' => '4:-1:0:1:3',
    );
}

#
# Other todos:
# It would be good to keep track of any .N MPI_ERR_xxx names in the structured
# comment and match these against any MPI_ERR_yyy used in the code, emitting a
# warning message for MPI_ERR_yyy values used in the code but not mentioned 
# in the header.  This could even apply to routines that are not at the MPI
# layer, forcing all routines to document all MPI error classes that they might
# return (this is like requiring routines to document the exceptions that 
# they may throw).
