scripts/clam/cpp/buildor_gen_deps.sh

   1 #!/bin/bash
   2 ###############################################################################
   3 #                                                                             #
   4 #  Name   : buildor_gen_deps                                                  #
   5 #  Author : Chris Koeritz                                                     #
   6 #  Rights : Copyright (C) 2008-$now by Author                                 #
   7 #                                                                             #
   8 ###############################################################################
   9 #  This script is free software; you can redistribute it and/or modify it     #
  10 #  under the terms of the GNU General Public License as published by the Free #
  11 #  Software Foundation; either version 2 of the License or (at your option)   #
  12 #  any later version.  See "http://www.fsf.org/copyleft/gpl.html" for a copy  #
  13 #  of the License online.  Please send any updates to "fred@gruntose.com".    #
  14 ###############################################################################
  15
  16 if [ ! -z "$CLEAN" ]; then
  17   echo "in cleaning mode, will not build dependencies."
  18   exit 0
  19 fi
  20
  21 # this script finds all of the headers used by a cpp file and outputs a
  22 # list of other cpp files that are probably needed for building it.
  23
  24   # these semi-global variables used throughout the whole script to accumulate
  25   # information, rather than trying to juggle positional parameters everywhere.
  26
  27   # the list of dependencies being accumulated.
  28   declare -a dependency_accumulator=()
  29
  30   # a set of files that are known to be bad, since we cannot find them.
  31   declare -a bad_files=()
  32
  33   # makes sure we don't keep looking at files even when they're neither
  34   # bad nor listed as dependencies.
  35   declare -a boring_files=()
  36
  37   # this directory is not allowed to participate in the scavenging
  38   # because it's where the tool was pointed at.  if we allowed files in
  39   # the app's same directory to be added, that leads to bad dependencies.
  40   prohibited_directory=""
  41
  42 # set up the separator character so we don't eat tabs or spaces.  this should
  43 # be a character we hope to see pretty much never in a file near the includes.
  44 export IFS=""
  45
  46 # create a variable with the tab in it to avoid weirdness with grep.
  47 TAB_CHAR="$(echo -e -n '\t')"
  48
  49 # reports if a certain dependency has been seen already.
  50 # a zero success value is returned if the file has been seen before,
  51 # and a non-zero failure value for when the file is totally new.
  52 function seen_already {
  53   if existing_dep "$1"; then return 0; fi  # added it to list already.
  54   if bad_file "$1"; then return 0; fi  # known to suck.
  55   if boring_file "$1"; then return 0; fi  # we already saw it.
  56   return 1  # we had not seen this one, so we return an error.
  57 }
  58
  59 # adds a new dependency at the end of the list.
  60 function add_new_dep {
  61   # make sure we haven't already processed this.
  62   local dep="$1"
  63   if seen_already "$dep"; then
  64 #echo bailing since seen: $dep
  65  return 1; fi
  66 #echo had not seen before: $dep
  67
  68 #  if existing_dep $dep; then return 1; fi  # added it to list already.
  69 #  if bad_file $dep; then return 1; fi  # known to suck.
  70 #  if boring_file $dep; then return 1; fi  # we already saw it.
  71 ##echo new dep: $dep
  72
  73   dependency_accumulator+=($dep)
  74   return 0
  75 }
  76
  77 # checks the existing dependencies to see if the first parameter is already
  78 # listed.  if this is the case, zero is returned (meaning success).  if
  79 # the dependency is missing, then -1 is return to indicate an error.
  80 function existing_dep {
  81 #hmmm: below is not very efficient!
  82   for currite in ${dependency_accumulator[*]}; do
  83     if [ "$currite" == "$1" ]; then return 0; fi
  84   done
  85   return 1
  86 }
  87
  88 # reports whether a file name has already been processed.
  89 function boring_file {
  90
  91 #hmmm: below might not be very efficient!
  92   for currite in ${boring_files[*]}; do
  93     if [ "$currite" == "$1" ]; then return 0; fi
  94   done
  95   return 1
  96 }
  97
  98 # reports whether a file name has already been found to be missing.
  99 function bad_file {
 100
 101 #hmmm: below also is not very efficient!
 102   for currite in ${bad_files[*]}; do
 103     if [ "$currite" == "$1" ]; then return 0; fi
 104   done
 105   return 1
 106 }
 107
 108 # checks whether an item is already contained in a list.  the first parameter
 109 # is taken as the item that one wants to add.  the second through n-th
 110 # parameters are taken as the candidate list.  if the item is present, then
 111 # zero is returned to indicate success.  otherwise a non-zero return value
 112 # indicates that the item was not yet present.
 113 function already_listed {
 114   to_find=$1
 115   shift
 116   while (( $# > 0 )); do
 117     # return that we found it if the current item matches.
 118     if [ "$to_find" == "$1" ]; then return 0; fi
 119     shift  # toss next one out.
 120   done
 121   # failed to match it.
 122   return 1
 123 }
 124
 125 # finds the index of a particular element in the remainder of a list.
 126 # the variable __finders_indy will be set to -1 for no match, or it will be the
 127 # index of the element if the item was found.
 128 __finders_indy=-1
 129 function find_in_array {
 130   local to_find=$1
 131 #echo find_in_array needs: $to_find
 132   shift
 133 #echo restargs finder: $*
 134   local indy=0
 135   while (( $# > 0 )); do
 136     # return that we found it if the current item matches.
 137 #echo "find_in_array posn $indy has $1"
 138     if [ "$to_find" == "$1" ]; then
 139 #echo "FOUND $to_find at $indy"
 140        __finders_indy=$indy
 141        return 0
 142     fi
 143     shift  # toss next one out.
 144     indy=$(expr $indy + 1)
 145 #echo "find_in_array indy now $indy "
 146   done
 147   _finders_indy=-1
 148   # failed to match it.
 149   return 1
 150 }
 151
 152 ############################################################################
 153 #
 154 # this variable gets stored into when resolve_filename runs.
 155 declare -a resolve_target_array=()
 156 #
 157 # this variable is used internally by resolve_filename.  it should not need
 158 # to be reset between runs on different files because the source hierarchy
 159 # is not supposed to be getting files deleted or added while the deps are
 160 # being geneated.
 161 declare -a resolve_matches_src=()
 162 declare -a resolve_matches_dest=()
 163 #
 164 # tries to find a filename in the library hierarchy.
 165 function resolve_filename {
 166   local code_file=$1
 167 #echo resolving: $code_file
 168   if [ -f "$code_file" ]; then
 169     # that was pretty easy.
 170     resolve_target_array=($code_file)
 171     return 0
 172   fi
 173 #echo "MUST seek: $code_file"
 174
 175   local dir=$(dirname "$code_file")
 176   local base=$(basename "$code_file")
 177   local src_key="$dir/$base"
 178 #echo "src_key: $src_key"
 179
 180   # see if we can find that element in the previously resolved items.
 181   if find_in_array "$src_key" ${resolve_matches_src[*]}; then
 182     local found_indy=$__finders_indy
 183     resolve_target_array=(${resolve_matches_dest[$found_indy]})
 184 #echo "FOUND \"$src_key\" AT ${resolve_matches_dest[$found_indy]}"
 185     return 0
 186   fi
 187
 188   # reset our global list.
 189   resolve_target_array=()
 190 #echo "HAVING TO FIND: $dir and $base"
 191   if [ -z "$dir" ]; then
 192     resolve_target_array=($(find "$BUILD_TOP" -iname "$base"))
 193   else
 194     resolve_target_array=($(find "$BUILD_TOP" -iname "$base" | grep "$dir.$base"))
 195   fi
 196 #echo resolved to: ${resolve_target_array[*]}
 197 #echo size of resolve array=${#resolve_target_array[*]}
 198   if [ ${#resolve_target_array[*]} -eq 1 ]; then
 199 #echo ADDING a match: $src_key ${resolve_target_array[0]}
 200     # for unique matches, we will store the correspondence so we can look
 201     # it up very quickly later.
 202     resolve_matches_src+=($src_key)
 203     resolve_matches_dest+=(${resolve_target_array[0]})
 204   fi
 205 }
 206 #
 207 ############################################################################
 208
 209 # main function that recurses on files and their dependencies.
 210 # this takes a list of file names to examine.  each one will have its
 211 # dependencies crawled.  we attempt to recurse on as few items as possible
 212 # by making sure we haven't already seen files or decided they're bad.
 213 function recurse_on_deps {
 214   # snag arguments into a list of dependencies to crawl.
 215   local -a active_deps=($*)
 216
 217   # pull off the first dependency so we can get all of its includes.
 218   local first_element="${active_deps[0]}"
 219   active_deps=(${active_deps[*]:1})
 220
 221   # make the best guess we can at the real path.
 222   resolve_filename $first_element
 223   local to_examine="${resolve_target_array[0]}"
 224
 225   # we didn't already have a failure (due to it being a bad file already
 226   # or other problems).  and once we execute the below code to grab the
 227   # file's dependencies, it really is boring and we never want to see it
 228   # again.
 229   boring_files+=($to_examine)
 230
 231 local dirtmp=$(dirname "$to_examine")
 232 local basetmp=$(basename "$to_examine")
 233 echo "dependent on: $(basename "$dirtmp")/$basetmp"
 234 #hmmm: gather the dependencies listed in debugging line above into a
 235 #      list that will be printed out at the end.
 236
 237   ##########################################################################
 238
 239   local current_includes="$(mktemp $TEMPORARIES_PILE/zz_buildor_deps4-$base.XXXXXX)"
 240   rm -f "$current_includes"
 241
 242   local partial_file="$(mktemp $TEMPORARIES_PILE/zz_buildor_deps5-$base.XXXXXX)"
 243   rm -f "$partial_file"
 244
 245   # find all the includes in this file and save to the temp file.
 246   while read -r spoon; do
 247     has_guard="$(echo "$spoon" \
 248         | sed -n -e 's/#ifdef __BUILD_STATIC_APPLICATION__/yep/p')"
 249     if [ ! -z "$has_guard" ]; then
 250       # quit reading when we've seen the start of one of our guards.
 251       break
 252     fi
 253     # if we are okay with the line, save it to the temp file.
 254     echo "$spoon"
 255   done <"$to_examine" >"$partial_file"
 256
 257   grep "^[ $TAB_CHAR]*#include.*" <"$partial_file" >>"$current_includes"
 258
 259   rm "$partial_file"
 260
 261 #echo "grabbing includes from: $to_examine"
 262
 263 #hmmm: could separate the find deps on this file stuff below.
 264
 265   local fp_dir=$(dirname "$to_examine")
 266 #echo fp_dir is: $fp_dir
 267
 268   # iterate across the dependencies we saw and add them to our list if
 269   # we haven't already.
 270   while read -r line_found; do
 271     local chew_toy=$(echo $line_found | sed -e 's/^[ \t]*#include *<\(.*\)>.*$/\1/')
 272     # we want to add the file to the active list before we forgot about it.
 273 #echo A: chew_toy=$chew_toy
 274
 275     # check whether the dependency looks like one of our style of includes.
 276     # if it doesn't have a slash in it, then we need to give it the same
 277     # directory as the file we're working on.
 278     local slash_present=$(echo $chew_toy | sed -n -e 's/.*[\\\/].*/yep/p')
 279
 280     # the replacement above to get rid of #include failed.  try something
 281     # simpler.
 282     if [ ! -z "$(echo $chew_toy | sed -n -e 's/#include/crud/p')" ]; then
 283       # try again with a simpler pattern.
 284       chew_toy=$(echo $line_found | sed -e 's/^[ \t]*#include *[">]\(.*\)[">].*$/\1/')
 285 #echo B: chew_toy=$chew_toy
 286
 287       # if it still has an #include or if it's not really a file, we can't
 288       # use it for anything.
 289       if [ ! -z "$(echo $chew_toy | sed -n -e 's/#include/crud/p')" ]; then
 290         echo "** bad include: $chew_toy"
 291         continue
 292       fi
 293
 294       # we are pretty sure that this file has no path components in it.
 295       # we will add the surrounding directory if possible.
 296       if [ -z "$slash_present" ]; then
 297         if [ -z "$fp_dir" ]; then
 298           # well, now we have no recourse, since we don't know where to
 299           # say this file comes from.
 300           echo "** unknown directory: $chew_toy"
 301         else
 302           # cool, we can rely on the existing directory.
 303           chew_toy="$fp_dir/$chew_toy"
 304 #echo patched dir: $chew_toy
 305         fi
 306       fi
 307     fi
 308
 309     if bad_file $chew_toy; then
 310 #echo C: skipping because on bad list: $chew_toy
 311       continue
 312     fi
 313
 314 ###  # if we've seen it before, we bail.
 315 ###  if seen_already "$to_examine"; then
 316 ###echo bailing since seen before: $to_examine
 317 ###return 0;
 318 ### fi
 319
 320   # now remember that we've seen this file.  we only remember it if
 321     # make sure we can see this file already, or we will need to seek it out.
 322     if [ ! -f "$chew_toy" ]; then
 323       # not an obvious filename yet.  try resolving it.
 324       resolve_filename $chew_toy
 325       declare -a found_odd=(${resolve_target_array[*]})
 326 #echo found-list-is: ${found_odd[*]}
 327       local odd_len=${#found_odd[*]}
 328 #echo odd len is $odd_len
 329       if [ $odd_len -eq 0 ]; then
 330         # whoops.  we couldn't find it.  probably a system header, so toss it.
 331 #echo "** ignoring: $chew_toy"
 332         bad_files+=($chew_toy)
 333         chew_toy=""
 334       elif [ $odd_len -eq 1 ]; then
 335         # there's exactly one match, which is very good.
 336         chew_toy="${found_odd[0]}"
 337 #echo C: chew_toy=$chew_toy
 338       else
 339         # this is really wrong.  there are multiple files with the same name?
 340         # that kind of things makes debugger tools angry or stupid.
 341         echo "** non-unique name: $chew_toy"
 342         bad_files+=($chew_toy)
 343         chew_toy=""
 344       fi
 345     fi
 346
 347     if [ ! -z "$chew_toy" -a ! -f "$chew_toy" ]; then
 348       echo "** failed to compute a real path for: $chew_toy"
 349       bad_files+=($chew_toy)
 350       chew_toy=""
 351       continue
 352     fi
 353
 354     # now if we got something out of our patterns, add it as a file to
 355     # investigate.
 356     if [ ! -z "$chew_toy" ]; then
 357       # add the dependency we found.
 358       if add_new_dep "$chew_toy"; then
 359         # if that worked, it's not existing or bad so we want to keep it.
 360         if ! already_listed "$chew_toy" ${active_deps[*]}; then
 361           # track the file for its own merits also (to squeeze more includes).
 362           active_deps+=($chew_toy)
 363         fi
 364       fi
 365     fi
 366
 367     # now compute the path as if it was the implementation file (x.cpp)
 368     # instead of being a header.  does that file exist?  if so, we'd like
 369     # its dependencies also.
 370     local cpp_toy=$(echo $chew_toy | sed -e 's/^\([^\.]*\)\.h$/\1.cpp/')
 371
 372     # there's no point in adding it if the name didn't change.
 373     if [ "$cpp_toy" != "$chew_toy" ]; then
 374       resolve_filename $cpp_toy
 375 #hmmm: what if too many matches occur?
 376       found_it="${resolve_target_array[0]}"
 377
 378       # if the dependency actually exists, then we'll add it to our list.
 379       if [ ! -z "$found_it" ]; then
 380         if add_new_dep "$found_it"; then
 381           # that was a new dependency, so we'll continue examining it.
 382           if ! already_listed "$found_it" ${active_deps[*]}; then
 383             active_deps+=($found_it)
 384           fi
 385         fi
 386       fi
 387     fi
 388   done <"$current_includes"
 389
 390   rm -f "$current_includes"
 391
 392   # keep going on the list after our modifications.
 393   if [ ${#active_deps[*]} -ne 0 ]; then recurse_on_deps ${active_deps[*]}; fi
 394   return 0
 395 }
 396
 397 # this takes the dependency list and adds it to our current file.
 398 function write_new_version {
 399   local code_file=$1
 400
 401   local opening_guard_line="\n#ifdef __BUILD_STATIC_APPLICATION__\n  // static dependencies found by buildor_gen_deps.sh:"
 402   local closing_guard_line="#endif // __BUILD_STATIC_APPLICATION__\n"
 403
 404 #echo "would write deps to: $code_file"
 405 #echo ${dependency_accumulator[*]}
 406
 407   local replacement_file="$(mktemp $TEMPORARIES_PILE/zz_buildor_deps3.XXXXXX)"
 408
 409   # blanks is a list of blank lines that we save up in between actual content.
 410   # if we don't hold onto them, we can have the effect of "walking" the static
 411   # section down the file as progressively more blanks get added.  we ensure
 412   # that only one is between the last code line and the guarded static chunk.
 413   declare -a blanks=()
 414   # read in our existing file.
 415   while read -r orig_line; do
 416 #echo "read: '$orig_line'"
 417     # if it's the beginning of our static app section, stop reading.
 418     if [ ! -z "$(echo $orig_line \
 419         | sed -n -e 's/#ifdef __BUILD_STATIC_APPLICATION__/yep/p')" ]; then
 420       break
 421     fi
 422     if [ -z "$orig_line" ]; then
 423       # add another blank line to our list and don't print any of them yet.
 424       blanks+=($'\n')
 425     else
 426       # this line is not a blank; send any pending blanks to the file first.
 427       if [ ${#blanks[*]} -ne 0 ]; then
 428         echo -n ${blanks[*]} >>"$replacement_file"
 429       fi
 430       echo "$orig_line" >>"$replacement_file"
 431       # reset our list of blank lines, since we just added them.
 432       blanks=()
 433     fi
 434   done <"$code_file"
 435
 436   echo -e "$opening_guard_line" >>"$replacement_file"
 437
 438   # now accumulate just the dependencies for a bit.
 439   local pending_deps="$(mktemp $TEMPORARIES_PILE/zz_buildor_deps2.XXXXXX)"
 440   rm -f "$pending_deps"
 441
 442   # iterate across all the dependencies we found.
 443   for line_please in ${dependency_accumulator[*]}; do
 444
 445     # throw out any items that are in the same directory we started in.
 446     if [ "$prohibited_directory" == "$(dirname $line_please)" ]; then
 447 #echo "skipping prohibited: $line_please"
 448       continue
 449     fi
 450
 451     # strip the line down to just the filename and single directory component.
 452     local chewed_line=$(echo $line_please | sed -e 's/.*[\\\/]\(.*\)[\\\/]\(.*\)$/\1\/\2/')
 453
 454     if [ ! -z "$(echo $chewed_line | sed -n -e 's/\.h$/yow/p')" ]; then
 455 #echo skipping header file: $chewed_line
 456       continue
 457     fi
 458
 459     local new_include="  #include <$chewed_line>"
 460     echo "$new_include" >>"$pending_deps"
 461 #echo adding "$new_include"
 462   done
 463
 464   sort "$pending_deps" >>"$replacement_file"
 465   rm -f "$pending_deps"
 466
 467   echo -e "$closing_guard_line" >>"$replacement_file"
 468
 469 #echo "about to move replacement, diffs:"
 470 #diff "$replacement_file" "$code_file"
 471 #echo "--------------"
 472 #echo full file:
 473 #cat "$replacement_file"
 474 #echo "--------------"
 475
 476   mv "$replacement_file" "$code_file"
 477 }
 478
 479 function find_dependencies {
 480   local code_file=$1
 481
 482   # initialize our globals.
 483   dependency_accumulator=()
 484   boring_files=()
 485
 486   # start recursing with the first dependency being the file itself.
 487   recurse_on_deps $code_file
 488
 489   # create the new version of the file.
 490   write_new_version "$code_file"
 491 }
 492
 493 # main script starts here.
 494
 495 for curr_parm in $*; do
 496
 497   echo "----------------------------------------------------------------------------"
 498   echo ""
 499
 500   # resets the bad list in between sessions.
 501   bad_files=()
 502 #echo bad_files initial: ${bad_files[*]}
 503
 504   if [ -f "$curr_parm" ]; then
 505     echo "scanning file: $curr_parm"
 506     # get absolute path of the containing directory.
 507     prohibited_directory="$(pwd "$curr_parm")"
 508     # fix our filename to be absolute.
 509     temp_absolute="$prohibited_directory/$(basename "$curr_parm")"
 510     curr_parm="$temp_absolute"
 511 #echo "curr_parm: $curr_parm"
 512     find_dependencies "$curr_parm"
 513   elif [ -d "$curr_parm" ]; then
 514     echo "scanning folder: $curr_parm"
 515     # get absolute path of the containing directory.
 516     prohibited_directory="$(pwd $curr_parm)"
 517     # set the directory to that absolute path.
 518     curr_parm="$prohibited_directory"
 519 #echo "curr_parm: $curr_parm"
 520     outfile="$(mktemp $TEMPORARIES_PILE/zz_buildor_deps1.XXXXXX)"
 521     find "$curr_parm" -iname "*.cpp" >"$outfile"
 522     while read -r line_found; do
 523       if [ $? != 0 ]; then break; fi
 524 #echo "looking at file: $line_found"
 525       find_dependencies "$line_found"
 526     done <"$outfile"
 527     rm -f "$outfile"
 528   else
 529     echo "parameter is not a file or directory: $curr_parm"
 530   fi
 531
 532   echo "ignored: " ${bad_files[*]}
 533
 534   echo ""
 535   echo ""
 536
 537 done
 538
 539