#! /bin/sh
# PCP QA Test No. 169
#
# Test pmcd timeouts with the pmcd PMDA's pmcd.control.timeout
#
# Test setting the app's timeouts < pmcd's timeout and vice versa
#
# Copyright (c) 1995-2002 Silicon Graphics, Inc.  All Rights Reserved.
#

seq=`basename $0`
echo "QA output created by $seq"

# get standard environment, filters and checks
. ./common.product
. ./common.check
. ./common.filter

echo ""
echo "Test pmcd timeouts with the pmcd PMDA's pmcd.control.timeout"
echo ""

LOCALHOST=`hostname`
signal=$PCP_BINADM_DIR/pmsignal

_filter_dots()
{
    sed -e 's/\.\.\.\.\.*/[dots]/'
}

_cleanup()
{
    if ! _service pmlogger stop >$tmp.stop 2>&1; then _exit 1; fi
    _wait_pmlogger_end || _exit 1
    if ! _service pmcd stop >>$tmp.stop 2>&1; then _exit 1; fi
    _wait_pmcd_end || _exit 1
    # primary pmlogger exiting during the test is racey depending on timing
    # of fetches and stopping/starting pmcd (this is also not under test).
    _filter_pcp_stop <$tmp.stop | _filter_dots | sed -e '/logger not running/d'
    echo >>$seq_full
    cat $PCP_LOG_DIR/pmcd/pmcd.log >>$seq_full
    echo >>$seq_full
    cat $PCP_LOG_DIR/pmcd/dumb_pmda.log >>$seq_full
    _restore_config $PCP_PMCDCONF_PATH
    _restore_config $PCP_SYSCONFIG_DIR/pmcd
    _service pmcd start >$tmp.start 2>&1
    _wait_for_pmcd
    _restore_auto_restart pmcd
    _service pmlogger start >>$tmp.start 2>&1
    _wait_for_pmlogger
    _restore_auto_restart pmlogger
    _filter_pcp_start <$tmp.start | _filter_dots
    _restore_auto_restart pmie
    if $pmie_was_running
    then
	_service pmie start >>$seq_full 2>&1
    else
	_service pmie stop >>$seq_full 2>&1
	$sudo $PCP_BINADM_DIR/pmsignal -a -s TERM pmie >>$seq_full 2>&1
	_wait_pmie_end
    fi

    rm -f $tmp.*
}

status=1	# failure is the default!
trap "_cleanup; exit \$status" 0 1 2 3 15

_stop_auto_restart pmcd
_stop_auto_restart pmlogger
_stop_auto_restart pmie

pmie_was_running=false
if [ -f $PCP_RUN_DIR/pmie.pid ]
then
    pmie_was_running=true
    echo "Found pmie PID `cat $PCP_RUN_DIR/pmie.pid` running @ start" >>$seq_full
    if ! _service pmie stop >>$seq_full 2>&1; then _exit 1; fi
fi

# real QA test starts here

# This value is passed to processes this script creates on the command line.
# It's unlikely to occur anywhere in a ps output and is used as a tag to
# terminate or check for the existence of the processes later.
#
TAG=000000660066

# [Sat Jul 30 17:33:55] pminfo(26317) Error: __pmCloseChannel: fd=3 context=1: Timeout waiting for a response from PMCD
_filter()
{
    sed \
	-e 's/pminfo([0-9][0-9]*)/pminfo(PID)/' \
	-e 's/\[[A-Z][a-z][a-z] [A-Z][a-z][a-z]  *[0-9][0-9]* [0-9][0-9]:[0-9][0-9]:[0-9][0-9]]/[DATE]/' \
    # end
}

_check_pmcd_procs()
{
    n_pmcd=`$PCP_PS_PROG $PCP_PS_ALL_FLAGS \
	    | tee $tmp.pmcd \
	    | grep -F "$PCP_PMCD_PATH" \
	    | grep -v pmcd_wait \
	    | grep -v grep \
	    | wc -l \
	    | tr -d ' '`
    if [ "X$n_pmcd" != "X$1" ]
    then
	echo "Expected $1 pmcd processes but found $n_pmcd:"
	cat $tmp.pmcd
	cat $PCP_PMCDLOG_PATH
	file $PCP_LOG_DIR/core* $PCP_LOG_DIR/pmcd/core*
	return 1
    else
	return 0
    fi
}

_set_timeouts()
{
    pmcd_timeout=$1
    app_timeout=$2

    echo "app timeout = $app_timeout"
    echo "pmcd timeout = $pmcd_timeout"
    PMCD_REQUEST_TIMEOUT=$app_timeout
    export PMCD_REQUEST_TIMEOUT

    pmstore pmcd.control.timeout $pmcd_timeout

}

_echo()
{
    echo "$*"
    echo "$*" >>$seq_full
}

pmns=$tmp.pmns

# use a dummy pmns for pminfo
dummy_domain=160 # choose non-existent domain
cat >$pmns <<EOF
root {
    dummyproc   $dummy_domain:0:0
}
EOF

# pmcd will run a dumb_pmda, masquerading as the IRIX agent.  This will
# not respond to PDUs from PMCD, which should then time it out.
# The pmcd PMDA is required so that pmcd.control.timeout can be modified.
# We don't need any other PMDAs and in particular all pmlogger and pmie
# processes are stopped before pmcd is reconfigured.
# Save the current pmcd.conf so it can be restored afterwards.

# need PMDA executable someplace user $PCP_USER (pmcd) can read it
#
cp $here/src/dumb_pmda $tmp.dumb_pmda

_save_config $PCP_PMCDCONF_PATH
cat >$tmp.tmp <<End-Of-File
# Installed by PCP QA test $seq on `date`
# (blame Jonathan if you find this!)
#
dummyproc	$dummy_domain	pipe	binary 	$tmp.dumb_pmda -d $dummy_domain $TAG
End-Of-File

$PCP_AWK_PROG <$PCP_PMCDCONF_PATH >>$tmp.tmp '$1 == "pmcd" { print }'

pmcdlog=$PCP_PMCDLOG_PATH

if ! _service pmlogger stop >>$seq_full 2>&1; then _exit 1; fi
if ! _service pmie stop >>$seq_full 2>&1; then _exit 1; fi

$sudo cp $tmp.tmp $PCP_PMCDCONF_PATH

# turn off
# PMCD_RESTART_AGENTS=1
# from sysconfig environment
#
_save_config $PCP_SYSCONFIG_DIR/pmcd
$sudo sed -e '/^PMCD_RESTART_AGENTS/{
s/=1/=0/
i\
#\
# NOTE: changed by QA '"$seq on `date`"'
}' $PCP_SYSCONFIG_DIR/pmcd >$tmp.sysconfig
$sudo cp $tmp.sysconfig $PCP_SYSCONFIG_DIR/pmcd

# Note: need to use $PCP_RC_DIR/pmcd here, not _service pmcd so that
# systemd (if it is available) does not notice any pmcd exiting
#
$sudo $PCP_RC_DIR/pmcd restart >$tmp.start
_filter_pcp_start <$tmp.start | sed -e 's/\.\.\.\.\.*/[dots]/'
_wait_for_pmcd || _exit 1
if _check_pmcd_procs 1
then
    :
else
    _echo "Error: pmcd didn't start!"
    cat $pmcdlog >>$seq_full
    exit
fi

$PCP_PS_PROG $PCP_PS_ALL_FLAGS | sed -e 's/$/ /' >$tmp.ps
grep -E '[P]PID|[p]m|[d]umb' $tmp.ps >>$seq_full
old_dumb_pid=`$PCP_AWK_PROG <$tmp.ps '/\.dumb_pmda /{print $2}'`
echo "old_dumb_pid=$old_dumb_pid" >>$seq_full
old_pmcd_pid=`$PCP_AWK_PROG <$tmp.ps '/\/bin\/pmcd /{print $2}'`
echo "old_pmcd_pid=$old_pmcd_pid" >>$seq_full
if [ -z "$old_dumb_pid" ]
then
    _echo "Error: pmcd didn't create the dumb_pmda agent"
    cat $pmcdlog >>$seq_full
    exit
fi

# force to a known state
#
pmstore pmcd.control.timeout 5 >/dev/null

# for debugging
#
#debug# pmstore pmcd.control.tracenobuf 1 >/dev/null

#
# pmcd agent timeout < application time out
#
_echo ""
_echo "*** pmcd agent timeout < app timeout ***"
_echo "*** pmcd will killoff agent ***"
_echo ""
_set_timeouts 5 10

# Fetch from dummy domain.  This should cause the dumb_pmda agent to be
# timed-out by pmcd, resulting in its termination.
#
_echo ""
_echo "Expect dummyproc to fail (IPC protocol failure):"
pminfo -n $pmns -d dummyproc
_echo ""
# make sure pmcd has had a chance to wait() on the pmda process
#
sleep 2
pminfo -n $pmns -d dummyproc >/dev/null 2>&1

$PCP_PS_PROG $PCP_PS_ALL_FLAGS | sed -e 's/$/ /' >$tmp.ps
grep -E '[P]PID|[p]m|[d]umb' $tmp.ps >>$seq_full
new_dumb_pid=`$PCP_AWK_PROG <$tmp.ps '/\.dumb_pmda /{print $2}'`
echo "new_dumb_pid=$new_dumb_pid" >>$seq_full
new_pmcd_pid=`$PCP_AWK_PROG <$tmp.ps '/\/bin\/pmcd /{print $2}'`
echo "new_pmcd_pid=$new_pmcd_pid" >>$seq_full
if [ -z "$new_pmcd_pid" ]
then
    _echo "Error: pmcd died!"
    cat $pmcdlog >>$seq_full
    exit
fi
if [ "$new_pmcd_pid" != "$old_pmcd_pid" ]
then
    _echo "Error: pmcd restarted!"
    cat $pmcdlog >>$seq_full
    exit
fi
if [ -n "$new_dumb_pid" -a "$new_dumb_pid" = "$old_dumb_pid" ]
then
    _echo "Error: pmcd didn't terminate the dumb_pmda agent"
    cat $pmcdlog >>$seq_full
    exit
else
    _echo "pmcd terminated dummy the dumb_pmda agent as desired"
fi

echo >>$seq_full
cat $PCP_LOG_DIR/pmcd/pmcd.log >>$seq_full
echo >>$seq_full
cat $PCP_LOG_DIR/pmcd/dumb_pmda.log >>$seq_full

# reset (not restart) pmcd and restart any dead PMDAs
#
_echo ""
$sudo $signal -a -s HUP pmcd
sleep 1
_wait_for_pmcd || _exit 1
if _check_pmcd_procs 1
then
    :
else
    _echo "Error: pmcd was sent SIGHUP and died"
    cat $pmcdlog >>$seq_full
    exit
fi

$PCP_PS_PROG $PCP_PS_ALL_FLAGS | sed -e 's/$/ /' >$tmp.ps
grep -E '[P]PID|[p]m|[d]umb' $tmp.ps >>$seq_full
new_dumb_pid=`$PCP_AWK_PROG <$tmp.ps '/\.dumb_pmda /{print $2}'`
echo "new_dumb_pid=$new_dumb_pid" >>$seq_full
new_pmcd_pid=`$PCP_AWK_PROG <$tmp.ps '/\/bin\/pmcd /{print $2}'`
echo "new_pmcd_pid=$new_pmcd_pid" >>$seq_full
if [ -z "$new_pmcd_pid" ]
then
    _echo "Error: pmcd died!"
    cat $pmcdlog >>$seq_full
    exit
fi
if [ "$new_pmcd_pid" != "$old_pmcd_pid" ]
then
    _echo "Error: pmcd restarted!"
    cat $pmcdlog >>$seq_full
    exit
fi
if [ -n "$new_dumb_pid" -a "$new_dumb_pid" != "$old_dumb_pid" ]
then
    old_dumb_pid=$new_dumb_pid
elif [ -z "$new_dumb_pid" ]
then
    _echo "Error: pmcd SIGHUP didn't restart the dumb_pmda agent"
    cat $pmcdlog >>$seq_full
    exit
else
    _echo "Error: dumb_pmda agent with same pid has arisen from the dead!"
    cat $pmcdlog >>$seq_full
    exit
fi

#
# pmcd agent timeout > application time out
#
_echo ""
_echo "*** pmcd agent timeout > app timeout ***"
_echo "*** pmcd will not killoff agent until later ***"
_echo ""
_set_timeouts 10 5

# Fetch from dummy domain.  This should cause the dumb_pmda agent to be
# timed-out by pmcd, resulting in its termination.
#
_echo ""
_echo "Expect dummyproc to fail (timeout for pmcd failure):"
echo "Before pminfo: `date`" >>$seq_full
pminfo -n $pmns -d dummyproc 2>&1 \
| _filter
echo "After pminfo: `date`" >>$seq_full
_echo ""

$PCP_PS_PROG $PCP_PS_ALL_FLAGS | sed -e 's/$/ /' >$tmp.ps
grep -E '[P]PID|[p]m|[d]umb' $tmp.ps >>$seq_full
new_dumb_pid=`$PCP_AWK_PROG <$tmp.ps '/\.dumb_pmda /{print $2}'`
echo "new_dumb_pid=$new_dumb_pid" >>$seq_full
new_pmcd_pid=`$PCP_AWK_PROG <$tmp.ps '/\/bin\/pmcd /{print $2}'`
echo "new_pmcd_pid=$new_pmcd_pid" >>$seq_full
if [ -z "$new_pmcd_pid" ]
then
    _echo "Error: pmcd died!"
    cat $pmcdlog >>$seq_full
    exit
fi
if [ "$new_pmcd_pid" != "$old_pmcd_pid" ]
then
    _echo "Error: pmcd restarted!"
    cat $pmcdlog >>$seq_full
    exit
fi
if [ -n "$new_dumb_pid" ]
then
    _echo "pmcd did NOT terminate the dumb_pmda agent as desired"

    if [ "$old_dumb_pid" != "$new_dumb_pid" ]
    then
	_echo "Error: dumb_pmda proc has been recreated"
	_echo "pid mismatch: $old_dumb_pid versus $new_dumb_pid"
	exit
    fi
else
    _echo "Error: pmcd terminated the dumb_pmda agent"
    cat $pmcdlog >>$seq_full
    exit
fi

_echo ""
_echo "Now wait for pmcd to timeout..."
_echo ""
sleep 8

$PCP_PS_PROG $PCP_PS_ALL_FLAGS | sed -e 's/$/ /' >$tmp.ps
grep -E '[P]PID|[p]mcd|[p]mda' $tmp.ps >>$seq_full
new_dumb_pid=`$PCP_AWK_PROG <$tmp.ps '/\.dumb_pmda /{print $2}'`
echo "new_dumb_pid=$new_dumb_pid" >>$seq_full
new_pmcd_pid=`$PCP_AWK_PROG <$tmp.ps '/\/bin\/pmcd /{print $2}'`
echo "new_pmcd_pid=$new_pmcd_pid" >>$seq_full
if [ -z "$new_pmcd_pid" ]
then
    _echo "Error: pmcd died!"
    cat $pmcdlog >>$seq_full
    exit
fi
if [ "$new_pmcd_pid" != "$old_pmcd_pid" ]
then
    _echo "Error: pmcd restarted!"
    cat $pmcdlog >>$seq_full
    exit
fi
if [ -n "$new_dumb_pid" ]
then
    if [ "$new_dumb_pid" = "$old_dumb_pid" ]
    then
	_echo "Error: pmcd didn't terminate the dumb_pmda agent"
	cat $pmcdlog >>$seq_full
	exit
    else
	_echo "Error: dumb_pmda proc has been recreated"
	_echo "pid mismatch: $old_dumb_pid versus $new_dumb_pid"
	exit
    fi
else
    _echo "pmcd terminated the dumb_pmda agent as desired"
fi

_echo ""
status=0
