scripts/clam/cpp/buildor_gen_deps.sh

   1 #!/bin/bash
   2 ###############################################################################
   3 #                                                                             #
   4 #  Name   : buildor_gen_deps                                                  #
   5 #  Author : Chris Koeritz                                                     #
   6 #  Rights : Copyright (C) 2008-$now by Author                                 #
   7 #                                                                             #
   8 ###############################################################################
   9 #  This script is free software; you can redistribute it and/or modify it     #
  10 #  under the terms of the GNU General Public License as published by the Free #
  11 #  Software Foundation; either version 2 of the License or (at your option)   #
  12 #  any later version.  See "http://www.fsf.org/copyleft/gpl.html" for a copy  #
  13 #  of the License online.  Please send any updates to "fred@gruntose.com".    #
  14 ###############################################################################
  15
  16 # this script finds all of the headers used by a cpp file and outputs a
  17 # list of other cpp files that are probably needed for building it.
  18
  19   # these semi-global variables used throughout the whole script to accumulate
  20   # information, rather than trying to juggle positional parameters everywhere.
  21
  22   # the list of dependencies being accumulated.
  23   declare -a dependency_accumulator=()
  24
  25   # a set of files that are known to be bad, since we cannot find them.
  26   declare -a bad_files=()
  27
  28   # makes sure we don't keep looking at files even when they're neither
  29   # bad nor listed as dependencies.
  30   declare -a boring_files=()
  31
  32   # this directory is not allowed to participate in the scavenging
  33   # because it's where the tool was pointed at.  if we allowed files in
  34   # the app's same directory to be added, that leads to bad dependencies.
  35   prohibited_directory=""
  36
  37 # set up the separator character so we don't eat tabs or spaces.  this should
  38 # be a character we hope to see pretty much never in a file near the includes.
  39 export IFS=""
  40
  41 # create a variable with the tab in it to avoid weirdness with grep.
  42 TAB_CHAR="$(echo -e -n '\t')"
  43
  44 # reports if a certain dependency has been seen already.
  45 # a zero success value is returned if the file has been seen before,
  46 # and a non-zero failure value for when the file is totally new.
  47 function seen_already {
  48   if existing_dep "$1"; then return 0; fi  # added it to list already.
  49   if bad_file "$1"; then return 0; fi  # known to suck.
  50   if boring_file "$1"; then return 0; fi  # we already saw it.
  51   return 1  # we had not seen this one, so we return an error.
  52 }
  53
  54 # adds a new dependency at the end of the list.
  55 function add_new_dep {
  56   # make sure we haven't already processed this.
  57   local dep="$1"
  58   if seen_already "$dep"; then
  59 #echo bailing since seen: $dep
  60  return 1; fi
  61 #echo had not seen before: $dep
  62
  63 #  if existing_dep $dep; then return 1; fi  # added it to list already.
  64 #  if bad_file $dep; then return 1; fi  # known to suck.
  65 #  if boring_file $dep; then return 1; fi  # we already saw it.
  66 ##echo new dep: $dep
  67
  68   dependency_accumulator+=($dep)
  69   return 0
  70 }
  71
  72 # checks the existing dependencies to see if the first parameter is already
  73 # listed.  if this is the case, zero is returned (meaning success).  if
  74 # the dependency is missing, then -1 is return to indicate an error.
  75 function existing_dep {
  76 #hmmm: below is not very efficient!
  77   for currite in ${dependency_accumulator[*]}; do
  78     if [ "$currite" == "$1" ]; then return 0; fi
  79   done
  80   return 1
  81 }
  82
  83 # reports whether a file name has already been processed.
  84 function boring_file {
  85
  86 #hmmm: below might not be very efficient!
  87   for currite in ${boring_files[*]}; do
  88     if [ "$currite" == "$1" ]; then return 0; fi
  89   done
  90   return 1
  91 }
  92
  93 # reports whether a file name has already been found to be missing.
  94 function bad_file {
  95
  96 #hmmm: below also is not very efficient!
  97   for currite in ${bad_files[*]}; do
  98     if [ "$currite" == "$1" ]; then return 0; fi
  99   done
 100   return 1
 101 }
 102
 103 # checks whether an item is already contained in a list.  the first parameter
 104 # is taken as the item that one wants to add.  the second through n-th
 105 # parameters are taken as the candidate list.  if the item is present, then
 106 # zero is returned to indicate success.  otherwise a non-zero return value
 107 # indicates that the item was not yet present.
 108 function already_listed {
 109   to_find=$1
 110   shift
 111   while (( $# > 0 )); do
 112     # return that we found it if the current item matches.
 113     if [ "$to_find" == "$1" ]; then return 0; fi
 114     shift  # toss next one out.
 115   done
 116   # failed to match it.
 117   return 1
 118 }
 119
 120 # finds the index of a particular element in the remainder of a list.
 121 # the variable __finders_indy will be set to -1 for no match, or it will be the
 122 # index of the element if the item was found.
 123 __finders_indy=-1
 124 function find_in_array {
 125   local to_find=$1
 126 #echo find_in_array needs: $to_find
 127   shift
 128 #echo restargs finder: $*
 129   local indy=0
 130   while (( $# > 0 )); do
 131     # return that we found it if the current item matches.
 132 #echo "find_in_array posn $indy has $1"
 133     if [ "$to_find" == "$1" ]; then
 134 #echo "FOUND $to_find at $indy"
 135        __finders_indy=$indy
 136        return 0
 137     fi
 138     shift  # toss next one out.
 139     indy=$(expr $indy + 1)
 140 #echo "find_in_array indy now $indy "
 141   done
 142   _finders_indy=-1
 143   # failed to match it.
 144   return 1
 145 }
 146
 147 ############################################################################
 148 #
 149 # this variable gets stored into when resolve_filename runs.
 150 declare -a resolve_target_array=()
 151 #
 152 # this variable is used internally by resolve_filename.  it should not need
 153 # to be reset between runs on different files because the source hierarchy
 154 # is not supposed to be getting files deleted or added while the deps are
 155 # being geneated.
 156 declare -a resolve_matches_src=()
 157 declare -a resolve_matches_dest=()
 158 #
 159 # tries to find a filename in the library hierarchy.
 160 function resolve_filename {
 161   local code_file=$1
 162 #echo resolving: $code_file
 163   if [ -f "$code_file" ]; then
 164     # that was pretty easy.
 165     resolve_target_array=($code_file)
 166     return 0
 167   fi
 168 #echo "MUST seek: $code_file"
 169
 170   local dir=$(dirname "$code_file")
 171   local base=$(basename "$code_file")
 172   local src_key="$dir/$base"
 173 #echo "src_key: $src_key"
 174
 175   # see if we can find that element in the previously resolved items.
 176   if find_in_array "$src_key" ${resolve_matches_src[*]}; then
 177     local found_indy=$__finders_indy
 178     resolve_target_array=(${resolve_matches_dest[$found_indy]})
 179 #echo "FOUND \"$src_key\" AT ${resolve_matches_dest[$found_indy]}"
 180     return 0
 181   fi
 182
 183   # reset our global list.
 184   resolve_target_array=()
 185 #echo "HAVING TO FIND: $dir and $base"
 186   if [ -z "$dir" ]; then
 187     resolve_target_array=($(find "$BUILD_TOP" -iname "$base"))
 188   else
 189     resolve_target_array=($(find "$BUILD_TOP" -iname "$base" | grep "$dir.$base"))
 190   fi
 191 #echo resolved to: ${resolve_target_array[*]}
 192 #echo size of resolve array=${#resolve_target_array[*]}
 193   if [ ${#resolve_target_array[*]} -eq 1 ]; then
 194 #echo ADDING a match: $src_key ${resolve_target_array[0]}
 195     # for unique matches, we will store the correspondence so we can look
 196     # it up very quickly later.
 197     resolve_matches_src+=($src_key)
 198     resolve_matches_dest+=(${resolve_target_array[0]})
 199   fi
 200 }
 201 #
 202 ############################################################################
 203
 204 # main function that recurses on files and their dependencies.
 205 # this takes a list of file names to examine.  each one will have its
 206 # dependencies crawled.  we attempt to recurse on as few items as possible
 207 # by making sure we haven't already seen files or decided they're bad.
 208 function recurse_on_deps {
 209   # snag arguments into a list of dependencies to crawl.
 210   local -a active_deps=($*)
 211
 212   # pull off the first dependency so we can get all of its includes.
 213   local first_element="${active_deps[0]}"
 214   active_deps=(${active_deps[*]:1})
 215
 216   # make the best guess we can at the real path.
 217   resolve_filename $first_element
 218   local to_examine="${resolve_target_array[0]}"
 219
 220   # we didn't already have a failure (due to it being a bad file already
 221   # or other problems).  and once we execute the below code to grab the
 222   # file's dependencies, it really is boring and we never want to see it
 223   # again.
 224   boring_files+=($to_examine)
 225
 226 local dirtmp=$(dirname "$to_examine")
 227 local basetmp=$(basename "$to_examine")
 228 echo "dependent on: $(basename "$dirtmp")/$basetmp"
 229 #hmmm: gather the dependencies listed in debugging line above into a
 230 #      list that will be printed out at the end.
 231
 232   ##########################################################################
 233
 234   local current_includes="$(mktemp $TEMPORARIES_DIR/zz_buildor_deps4-$base.XXXXXX)"
 235   rm -f "$current_includes"
 236
 237   local partial_file="$(mktemp $TEMPORARIES_DIR/zz_buildor_deps5-$base.XXXXXX)"
 238   rm -f "$partial_file"
 239
 240   # find all the includes in this file and save to the temp file.
 241   while read -r spoon; do
 242     has_guard="$(echo "$spoon" \
 243         | sed -n -e 's/#ifdef __BUILD_STATIC_APPLICATION__/yep/p')"
 244     if [ ! -z "$has_guard" ]; then
 245       # quit reading when we've seen the start of one of our guards.
 246       break
 247     fi
 248     # if we are okay with the line, save it to the temp file.
 249     echo "$spoon"
 250   done <"$to_examine" >"$partial_file"
 251
 252   grep "^[ $TAB_CHAR]*#include.*" <"$partial_file" >>"$current_includes"
 253
 254   rm "$partial_file"
 255
 256 #echo "grabbing includes from: $to_examine"
 257
 258 #hmmm: could separate the find deps on this file stuff below.
 259
 260   local fp_dir=$(dirname "$to_examine")
 261 #echo fp_dir is: $fp_dir
 262
 263   # iterate across the dependencies we saw and add them to our list if
 264   # we haven't already.
 265   while read -r line_found; do
 266     local chew_toy=$(echo $line_found | sed -e 's/^[ \t]*#include *<\(.*\)>.*$/\1/')
 267     # we want to add the file to the active list before we forgot about it.
 268 #echo A: chew_toy=$chew_toy
 269
 270     # check whether the dependency looks like one of our style of includes.
 271     # if it doesn't have a slash in it, then we need to give it the same
 272     # directory as the file we're working on.
 273     local slash_present=$(echo $chew_toy | sed -n -e 's/.*[\\\/].*/yep/p')
 274
 275     # the replacement above to get rid of #include failed.  try something
 276     # simpler.
 277     if [ ! -z "$(echo $chew_toy | sed -n -e 's/#include/crud/p')" ]; then
 278       # try again with a simpler pattern.
 279       chew_toy=$(echo $line_found | sed -e 's/^[ \t]*#include *[">]\(.*\)[">].*$/\1/')
 280 #echo B: chew_toy=$chew_toy
 281
 282       # if it still has an #include or if it's not really a file, we can't
 283       # use it for anything.
 284       if [ ! -z "$(echo $chew_toy | sed -n -e 's/#include/crud/p')" ]; then
 285         echo "** bad include: $chew_toy"
 286         continue
 287       fi
 288
 289       # we are pretty sure that this file has no path components in it.
 290       # we will add the surrounding directory if possible.
 291       if [ -z "$slash_present" ]; then
 292         if [ -z "$fp_dir" ]; then
 293           # well, now we have no recourse, since we don't know where to
 294           # say this file comes from.
 295           echo "** unknown directory: $chew_toy"
 296         else
 297           # cool, we can rely on the existing directory.
 298           chew_toy="$fp_dir/$chew_toy"
 299 #echo patched dir: $chew_toy
 300         fi
 301       fi
 302     fi
 303
 304     if bad_file $chew_toy; then
 305 #echo C: skipping because on bad list: $chew_toy
 306       continue
 307     fi
 308
 309 ###  # if we've seen it before, we bail.
 310 ###  if seen_already "$to_examine"; then
 311 ###echo bailing since seen before: $to_examine
 312 ###return 0;
 313 ### fi
 314
 315   # now remember that we've seen this file.  we only remember it if
 316     # make sure we can see this file already, or we will need to seek it out.
 317     if [ ! -f "$chew_toy" ]; then
 318       # not an obvious filename yet.  try resolving it.
 319       resolve_filename $chew_toy
 320       declare -a found_odd=(${resolve_target_array[*]})
 321 #echo found-list-is: ${found_odd[*]}
 322       local odd_len=${#found_odd[*]}
 323 #echo odd len is $odd_len
 324       if [ $odd_len -eq 0 ]; then
 325         # whoops.  we couldn't find it.  probably a system header, so toss it.
 326 #echo "** ignoring: $chew_toy"
 327         bad_files+=($chew_toy)
 328         chew_toy=""
 329       elif [ $odd_len -eq 1 ]; then
 330         # there's exactly one match, which is very good.
 331         chew_toy="${found_odd[0]}"
 332 #echo C: chew_toy=$chew_toy
 333       else
 334         # this is really wrong.  there are multiple files with the same name?
 335         # that kind of things makes debugger tools angry or stupid.
 336         echo "** non-unique name: $chew_toy"
 337         bad_files+=($chew_toy)
 338         chew_toy=""
 339       fi
 340     fi
 341
 342     if [ ! -z "$chew_toy" -a ! -f "$chew_toy" ]; then
 343       echo "** failed to compute a real path for: $chew_toy"
 344       bad_files+=($chew_toy)
 345       chew_toy=""
 346       continue
 347     fi
 348
 349     # now if we got something out of our patterns, add it as a file to
 350     # investigate.
 351     if [ ! -z "$chew_toy" ]; then
 352       # add the dependency we found.
 353       if add_new_dep "$chew_toy"; then
 354         # if that worked, it's not existing or bad so we want to keep it.
 355         if ! already_listed "$chew_toy" ${active_deps[*]}; then
 356           # track the file for its own merits also (to squeeze more includes).
 357           active_deps+=($chew_toy)
 358         fi
 359       fi
 360     fi
 361
 362     # now compute the path as if it was the implementation file (x.cpp)
 363     # instead of being a header.  does that file exist?  if so, we'd like
 364     # its dependencies also.
 365     local cpp_toy=$(echo $chew_toy | sed -e 's/^\([^\.]*\)\.h$/\1.cpp/')
 366
 367     # there's no point in adding it if the name didn't change.
 368     if [ "$cpp_toy" != "$chew_toy" ]; then
 369       resolve_filename $cpp_toy
 370 #hmmm: what if too many matches occur?
 371       found_it="${resolve_target_array[0]}"
 372
 373       # if the dependency actually exists, then we'll add it to our list.
 374       if [ ! -z "$found_it" ]; then
 375         if add_new_dep "$found_it"; then
 376           # that was a new dependency, so we'll continue examining it.
 377           if ! already_listed "$found_it" ${active_deps[*]}; then
 378             active_deps+=($found_it)
 379           fi
 380         fi
 381       fi
 382     fi
 383   done <"$current_includes"
 384
 385   rm -f "$current_includes"
 386
 387   # keep going on the list after our modifications.
 388   if [ ${#active_deps[*]} -ne 0 ]; then recurse_on_deps ${active_deps[*]}; fi
 389   return 0
 390 }
 391
 392 # this takes the dependency list and adds it to our current file.
 393 function write_new_version {
 394   local code_file=$1
 395
 396   local opening_guard_line="\n#ifdef __BUILD_STATIC_APPLICATION__\n  // static dependencies found by buildor_gen_deps.sh:"
 397   local closing_guard_line="#endif // __BUILD_STATIC_APPLICATION__\n"
 398
 399 #echo "would write deps to: $code_file"
 400 #echo ${dependency_accumulator[*]}
 401
 402   local replacement_file="$(mktemp $TEMPORARIES_DIR/zz_buildor_deps3.XXXXXX)"
 403
 404   # blanks is a list of blank lines that we save up in between actual content.
 405   # if we don't hold onto them, we can have the effect of "walking" the static
 406   # section down the file as progressively more blanks get added.  we ensure
 407   # that only one is between the last code line and the guarded static chunk.
 408   declare -a blanks=()
 409   # read in our existing file.
 410   while read -r orig_line; do
 411 #echo "read: '$orig_line'"
 412     # if it's the beginning of our static app section, stop reading.
 413     if [ ! -z "$(echo $orig_line \
 414         | sed -n -e 's/#ifdef __BUILD_STATIC_APPLICATION__/yep/p')" ]; then
 415       break
 416     fi
 417     if [ -z "$orig_line" ]; then
 418       # add another blank line to our list and don't print any of them yet.
 419       blanks+=($'\n')
 420     else
 421       # this line is not a blank; send any pending blanks to the file first.
 422       if [ ${#blanks[*]} -ne 0 ]; then
 423         echo -n ${blanks[*]} >>"$replacement_file"
 424       fi
 425       echo "$orig_line" >>"$replacement_file"
 426       # reset our list of blank lines, since we just added them.
 427       blanks=()
 428     fi
 429   done <"$code_file"
 430
 431   echo -e "$opening_guard_line" >>"$replacement_file"
 432
 433   # now accumulate just the dependencies for a bit.
 434   local pending_deps="$(mktemp $TEMPORARIES_DIR/zz_buildor_deps2.XXXXXX)"
 435   rm -f "$pending_deps"
 436
 437   # iterate across all the dependencies we found.
 438   for line_please in ${dependency_accumulator[*]}; do
 439
 440     # throw out any items that are in the same directory we started in.
 441     if [ "$prohibited_directory" == "$(dirname $line_please)" ]; then
 442 #echo "skipping prohibited: $line_please"
 443       continue
 444     fi
 445
 446     # strip the line down to just the filename and single directory component.
 447     local chewed_line=$(echo $line_please | sed -e 's/.*[\\\/]\(.*\)[\\\/]\(.*\)$/\1\/\2/')
 448
 449     if [ ! -z "$(echo $chewed_line | sed -n -e 's/\.h$/yow/p')" ]; then
 450 #echo skipping header file: $chewed_line
 451       continue
 452     fi
 453
 454     local new_include="  #include <$chewed_line>"
 455     echo "$new_include" >>"$pending_deps"
 456 #echo adding "$new_include"
 457   done
 458
 459   sort "$pending_deps" >>"$replacement_file"
 460   rm -f "$pending_deps"
 461
 462   echo -e "$closing_guard_line" >>"$replacement_file"
 463
 464 #echo "about to move replacement, diffs:"
 465 #diff "$replacement_file" "$code_file"
 466 #echo "--------------"
 467 #echo full file:
 468 #cat "$replacement_file"
 469 #echo "--------------"
 470
 471   mv "$replacement_file" "$code_file"
 472 }
 473
 474 function find_dependencies {
 475   local code_file=$1
 476
 477   # initialize our globals.
 478   dependency_accumulator=()
 479   boring_files=()
 480
 481   # start recursing with the first dependency being the file itself.
 482   recurse_on_deps $code_file
 483
 484   # create the new version of the file.
 485   write_new_version "$code_file"
 486 }
 487
 488 # main script starts here.
 489
 490 for curr_parm in $*; do
 491
 492   echo "----------------------------------------------------------------------------"
 493   echo ""
 494
 495   # resets the bad list in between sessions.
 496   bad_files=()
 497 #echo bad_files initial: ${bad_files[*]}
 498
 499   if [ -f "$curr_parm" ]; then
 500     echo "scanning file: $curr_parm"
 501     # get absolute path of the containing directory.
 502     prohibited_directory="$(pwd "$curr_parm")"
 503     # fix our filename to be absolute.
 504     temp_absolute="$prohibited_directory/$(basename "$curr_parm")"
 505     curr_parm="$temp_absolute"
 506 #echo "curr_parm: $curr_parm"
 507     find_dependencies "$curr_parm"
 508   elif [ -d "$curr_parm" ]; then
 509     echo "scanning folder: $curr_parm"
 510     # get absolute path of the containing directory.
 511     prohibited_directory="$(pwd $curr_parm)"
 512     # set the directory to that absolute path.
 513     curr_parm="$prohibited_directory"
 514 #echo "curr_parm: $curr_parm"
 515     outfile="$(mktemp $TEMPORARIES_DIR/zz_buildor_deps1.XXXXXX)"
 516     find "$curr_parm" -iname "*.cpp" >"$outfile"
 517     while read -r line_found; do
 518       if [ $? != 0 ]; then break; fi
 519 #echo "looking at file: $line_found"
 520       find_dependencies "$line_found"
 521     done <"$outfile"
 522     rm -f "$outfile"
 523   else
 524     echo "parameter is not a file or directory: $curr_parm"
 525   fi
 526
 527   echo "ignored: " ${bad_files[*]}
 528
 529   echo ""
 530   echo ""
 531
 532 done
 533
 534