rc

#!/bin/bash -- also uncomment in edgeTypes
#

# Print out the natural logic relations in a way that can be copied
# into C++
function depInsert2relC() {
  echo "natlog_relation dependencyInsertToLexicalFunction(const dep_label& dep,"
  echo '                                                  const ::word& dependent) {'
  echo "  switch (dep) {"
  cat etc/depInsert2rel.tab | sed -e 's/prepc_/prep_/g' -e 's/\\\\\///g' -e 's/vs\./vs/g' -e 's/:/_/g' | sort | uniq |\
    awk -F'	' '{ print toupper($1) "\t" toupper($2) "\t" $3 }' |\
    sed -r -e 's/^([^\t]+)\t([^\t]+)\t(.*)$/    case DEP_\1: return FUNCTION_\2;  \/\/  \3/g'
  echo '    default:'
  echo '      fprintf(stderr, "No such dependency label: %u\n", dep);'
  echo '      std::exit(1);'
  echo '      return 255;'
  echo '  }'
  echo '}'
}

# Print out the natural logic relations in a way that can be copied
# into Java++
function depInsert2relJava() {
  echo ">>> Java (insertions)"
  cat etc/depInsert2rel.tab | sort | uniq |\
    awk -F'	' '{ print $1 "\t" toupper($2) "\t" $3 }' |\
    sed -r -e 's/^([^\t]+)\t([^\t]+)\t(.*)$/    put("\1", NaturalLogicRelation.\2);  \/\/ \3/g'
  echo "<<< Java (insertions)"
}

# Print out the headers for the different dependency edge types
function dep2header() {
  echo "typedef uint8_t dep_label;"
  echo ""
  cat etc/depInsert2rel.tab | sed -e 's/prepc_/prep_/g' -e 's/\\\\\///g' -e 's/vs\./vs/g' -e 's/:/_/g' | sort | uniq |\
    awk -F'	' '{ print toupper($1) }' |\
    awk '{printf("#define DEP_%s  %d\n", $0, NR - 1)}'
  echo "#define NUM_DEPENDENCY_LABELS `cat etc/depInsert2rel.tab | sed -e 's/prepc_/prep_/g' -e 's/\\\\\///g' -e 's/vs\./vs/g' | sort | uniq | wc -l`" 
}

# Print out the toString() and fromString() methods for translating
# between dependency strings and dependency types.
function dep2string() {
  echo "btree_map<string,uint8_t> dependency2index;"
  echo ""
  echo "void populateDependencyMap() {"

  cat etc/depInsert2rel.tab | sed -e 's/prepc_/prep_/g' | sort | uniq |\
    awk -F'	' '{ print tolower($1) "\t" toupper($1)}' |\
    sed -r -e 's/^([^\t]+)\t([^\t]+)$/  dependency2index["\1"] = DEP_\2;/g' |\
    sed -r -e 's/AND\\\\\/OR/ANDOR/g' -e 's/VS\./VS/g' -e 's/([A-Z]):/\1_/g'


  echo "}"
  echo ""
  echo "uint8_t indexDependency(const string& dependencyAsString) {"
  echo "  if (dependency2index.size() == 0) {"
  echo "    populateDependencyMap();"
  echo "  }"
  echo "  string lower = dependencyAsString;"
  echo "  std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower);"
  echo "  auto iter = dependency2index.find(lower);"
  echo "  if (iter != dependency2index.end()) {"
  echo "    return iter->second;"
  echo "  }"
  echo "  return NUM_DEPENDENCY_LABELS;"
  echo "}"
  echo ""

  echo "string dependencyGloss(const uint8_t& indexed) {"
  cat etc/depInsert2rel.tab | sed -e 's/prepc_/prep_/g' | sort | uniq |\
    awk -F'	' '{ print tolower($1) "\t" toupper($1)}' |\
    sed -r -e 's/^([^\t]+)\t([^\t]+)$/  if (indexed == DEP_\2) { return "\1"; }/g' |\
    sed -r -e 's/AND\\\\\/OR/ANDOR/g' -e 's/VS\./VS/g' -e 's/([A-Z]):/\1_/g'
  echo '  return "???";'
  echo "}"
}

function edgeType2String() {
  echo "btree_map<string,uint8_t> edgeType2Index;"
  echo ""
  echo "void populateEdgeTypeMap() {"

  cat etc/edgeTypes.tab | sort | uniq |\
    awk -F'\t' '{ print "  edgeType2Index[\"" tolower($2) "\"] = " $1 ";" }'

  echo "}"
  echo ""
  echo "uint8_t indexEdgeType(const string& edgeTypeAsString) {"
  echo "  if (dependency2index.size() == 0) {"
  echo "    populateEdgeTypeMap();"
  echo "  }"
  echo "  string lower = edgeTypeAsString;"
  echo "  std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower);"
  echo "  auto iter = edgeType2Index.find(lower);"
  echo "  if (iter != edgeType2Index.end()) {"
  echo "    return iter->second;"
  echo "  }"
  echo "  return NUM_MUTATION_TYPES;"
  echo "}"
}

# Print out the edge relation indices in a way that can be copied
# into c++ / java
function edgeTypes() {
  echo '// Graph Edges'
  cat etc/edgeTypes.tab |\
    awk -F'	' '{ print "#define " toupper($2) "     " toupper($1) }'
  echo "#define NUM_MUTATION_TYPES `cat etc/edgeTypes.tab | wc -l`"
}

# Print out the dummy vocab entries to be put into the mock graph
# definitions in Types.h
function dummyVocab() {
  for word in "of" "all" "lemur" "animal" "potto" "cat" "furry" "fuzzy" "have" "tail" "some" "dog" "chase" "no" "plasma" "conduct" "conductor" "electricity" "be" "nail" ; do
    echo "#define `echo $word | tr '[:lower:]' '[:upper:]' | sed -e 's/OF/WOF/g' `  getTaggedWord(`zcat etc/vocab.tab.gz | grep "	$word$" | awk '{ print $1 }'`, 0, 0)"
    echo "#define `echo $word | tr '[:lower:]' '[:upper:]' | sed -e 's/OF/WOF/g' `_STR  \"`zcat etc/vocab.tab.gz| grep "	$word$" | awk '{ print $1 }'`\""
  done
  echo "#define HIGHEST_MOCK_WORD_INDEX `for word in "all" "lemur" "animal" "potto" "cat" "furry" "have" "tail" "some" "dog" "chase" "no"; do
    zcat etc/vocab.tab.gz | grep "	$word$" | awk '{ print $1 }'
  done | sort -n -r | head -n 1`"
}

function _stringToMonotonicity() {
  mono=$2
  if [ "$mono" = "" ]; then
    mono="mono"
  fi
  typ=$3
  if [ "$typ" = "" ]; then
    typ="type"
  fi
  if [ "$1" = "monotone" ]; then
    echo "*$mono = MONOTONE_UP; *$typ = QUANTIFIER_TYPE_NONE; "
  elif [ "$1" = "additive" ]; then
    echo "*$mono = MONOTONE_UP; *$typ = QUANTIFIER_TYPE_ADDITIVE; "
  elif [ "$1" = "multiplicative" ]; then
    echo "*$mono = MONOTONE_UP; *$typ = QUANTIFIER_TYPE_MULTIPLICATIVE; "
  elif [ "$1" = "additive-multiplicative" ]; then
    echo "*$mono = MONOTONE_UP; *$typ = QUANTIFIER_TYPE_BOTH; "
  elif [ "$1" = "antitone" ]; then
    echo "*$mono = MONOTONE_DOWN; *$typ = QUANTIFIER_TYPE_NONE; "
  elif [ "$1" = "anti-additive" ]; then
    echo "*$mono = MONOTONE_DOWN; *$typ = QUANTIFIER_TYPE_ADDITIVE; "
  elif [ "$1" = "anti-multiplicative" ]; then
    echo "*$mono = MONOTONE_DOWN; *$typ = QUANTIFIER_TYPE_MULTIPLICATIVE; "
  elif [ "$1" = "anti-additive-multiplicative" ]; then
    echo "*$mono = MONOTONE_DOWN; *$typ = QUANTIFIER_TYPE_BOTH; "
  elif [ "$1" = "nonmonotone" ]; then
    echo "*$mono = MONOTONE_FLAT; *$typ = QUANTIFIER_TYPE_NONE; "
  elif [ "$typ" = "type" ]; then  # a dirty dirty hack
    echo "*$mono = MONOTONE_FLAT; *$typ = QUANTIFIER_TYPE_NONE; "
  else
    echo "UNKNOWN MONOTONICITY: $1"
  fi
}

# Create the mapping from quantifier index to quantifier characterization
function characterizeQuantifiers() {
#quantifier_type subjType, objType;
#monotonicity subjMono, objMono;
#characterizeQuantifier(edge.source, &subjType, &objType, &subjMono, &objMono);
  echo "void characterizeQuantifier(word w, quantifier_type* subjType, quantifier_type* objType, monotonicity* subjMono, monotonicity* objMono) {"
  echo "  switch(w) {"
  cat etc/operators.tab |\
    grep -v '^$' | grep -v '^#' | grep -v '__implicit' |\
    grep -v 'UNARY_' |\
    awk -F'	' '{ print $3 ":" $5 ":" $6 }' |\
    while read line ; do 
      word=`echo $line | awk -F":" '{ print $1 }'`
      subj=`echo $line | awk -F":" '{ print $2 }'`
      obj=`echo $line | awk -F":" '{ print $3 }'`
      echo "    // $word"
      echo -n "    case "
      echo -n `index "$word"`
      echo -n ": "
      echo -n `_stringToMonotonicity "$subj" subjMono subjType`
      echo -n " "
      echo -n `_stringToMonotonicity "$obj" objMono objType`
      echo -n " "
      echo "return;"
    done
  echo "    default:"
  echo "      *subjMono = MONOTONE_UP;"
  echo "      *subjType = QUANTIFIER_TYPE_BOTH;"
  echo "      *objMono = MONOTONE_UP;"
  echo "      *objType = QUANTIFIER_TYPE_BOTH;"
  echo "      break;"
#  echo '      fprintf(stderr, "Could not find operator: %u\\n", w);'
#  echo '      exit(1);'
  echo '  }'
  echo "}"
}

# A function to sanity check the produced graph
function validateGraph() {
  echo "Validating graph..."
  echo "(this fails unless you see 'All graph tests passed!')"
  echo "Checking non-numeric vertices..." &&\
  (zcat etc/graph.tab.gz |\
    awk '{ print $1 " " $3 }' |\
    egrep -v '[0-9]+ [0-9]+' | wc -l | grep '^0$') > /dev/null &&\
  echo "Graph has no non-numeric vertices" &&\
  echo "Checking invalid costs..." &&\
  (zcat etc/graph.tab.gz |\
    awk -F'	' '{ print $6 }' |\
    egrep -v '^[0-9\.]+$' | wc -l | grep '^0$') > /dev/null &&\
  echo "Graph has invalid costs" &&\
  echo "All graph tests passed!"
}

# A utility to get the index of a word
function index() {
  zcat etc/vocab.tab.gz | grep "	$1$" | awk '{ print $1 }'
}

# A utility to return the gloss of an index
function gloss() {
  zcat etc/vocab.tab.gz | grep "^$1	" | awk '{ print $2 }'
}

# A utility to find all edges between two words
function edge() {
  source=`index $1`
  if [ "$2" = "" ]; then
    zcat etc/graph.tab.gz | egrep "^$source" |\
      awk '{ print $2 "\t" $4 "\t" $5 "\t" $6 "\t" $3 }' |\
      awk -F'	' '
          FNR == NR {
              assoc[ $1 ] = $2;
              next;
          }
          FNR < NR {
              if ( $3 in assoc ) {
                  $3 = assoc[ $3 ]
              }
              print $1 " " $2 " " $3 " " $4 " " $5
          }
      ' etc/edgeTypes.tab - |\
        gawk '{
          print $1 " " $2 " " $3 " " $4 " " $5
        }' |\
          sed -r -e "s/(.*) (.*) (.*) (.*) (.*)/$1 ($source:\1)  \3  ??? (\5:\2)   @cost=\4/g"
  else
    sink=`index $2`
    zcat etc/graph.tab.gz | egrep "^$source	[^	]+	$sink	" |\
      awk '{ print $2 "\t" $4 "\t" $5 "\t" $6 }' |\
      awk -F'	' '
          FNR == NR {
              assoc[ $1 ] = $2;
              next;
          }
          FNR < NR {
              if ( $3 in assoc ) {
                  $3 = assoc[ $3 ]
              }
              print
          }
      ' etc/edgeTypes.tab - |\
        sed -r -e "s/(.*) (.*) (.*) (.*)/$1 ($source:\1)  \3  $2 ($sink:\2)   @cost=\4/g"
  fi
}

# A utility to return the definitions of a word.
function sense() {
  index=`index $1`
  if [ "$2" = "" ]; then
    zcat etc/sense.tab.gz | grep "^$index	" | sed -e "s/^$index/$1/g"
  else
    zcat etc/sense.tab.gz | grep "^$index	" |\
      grep "	$2	" |\
      awk -F'	' '{ print $1 "\t" $3 }' |\
      sed -e "s/^$index/$1:/g"
  fi
}

function define() {
  sense "$@"
}

function updateCache() {
  cat tmp/naturalli_classifier_search.cache tmp/naturalli_classifier_search_2.cache |\
    sort |\
    uniq \
    > tmp/foo && \
  rm -f tmp/naturalli_classifier_search.cache && \
  mv tmp/foo tmp/naturalli_classifier_search.cache && \
  chmod -w tmp/naturalli_classifier_search.cache
}

function help() {
  echo "Utilities installed:"
  echo "  * index <word_form> : Print the index of the given word gloss."
  echo "  * gloss <word_index> : Print the gloss of a given word index."
  echo "  * edge <source> <sink> : Print the edge between two word glosses."
  echo "  * sense <word_form> [<sense_number>] : Print the definition of a word."
  echo "                                         If no sense number, print all senses."
  echo "  * define <word_form> [<sense_number>] : ^As above."
}