Browse Source

Bug 608196 Incremental genxref implementation: unit at a time

14 years ago
parent
commit
1f3f2fae75
3 changed files with 378 additions and 88 deletions
  1. 147 0
      compare-dir-trees.pl
  2. 179 86
      genxref
  3. 52 2
      update-xref.pl

+ 147 - 0
compare-dir-trees.pl

@@ -0,0 +1,147 @@
+#!/usr/bin/perl -w
+#
+# ./compare-dir-trees.pl /first/path /second/path
+#
+# returns 0 if both paths have the same elements.
+# an element is not the same as another element if it is a different kind of
+# object (link, directory, file).
+# a directory is not the same as another directory if it has different children.
+# a link is not the same as another link if the link data is different.
+# a normal file is not the same as another file if their contents differ.
+#
+# returns a non zero value if any of the above do not hold.
+
+my ($left, $right) = @ARGV;
+my $verbose = 0;
+
+sub debug {
+  return unless $verbose;
+  my ($debug) = @_;
+  print STDERR $debug . "\n";
+}
+
+sub ensure_d {
+  my ($dir) = @_;
+  return 0 if -d $dir;
+  debug "$dir does not exist!";
+  exit -2;
+}
+
+sub compare_items {
+  my ($l, $lf, $r, $rf) = @_;
+  if ($lf eq $rf) {
+    return ("$l/$lf", "$r/$rf");
+  }
+  debug "directory contents mismatch for $l - $r: $lf - $rf";
+  exit 1;
+}
+
+sub compare_dirs {
+  my ($l, $r) = @_;
+  my ($l_fail, $r_fail) = (0, 0);
+  $l_fail = 1 unless opendir(LEFT, $l);
+  $r_fail = 1 unless opendir(RIGHT, $r);
+  unless ($l_fail == $r_fail) {
+    debug "$l-$l_fail did not match $r-$r_fail!";
+    exit 1;
+  }
+  return if $l_fail;
+  my (@llinks, @rlinks, @ldirs, @rdirs, @lfiles, @rfiles);
+  {
+    my @names = sort readdir(LEFT);
+    foreach my $i (@names) {
+      next if $i eq '.';
+      next if $i eq '..';
+      if (-l "$l/$i") {
+        push @llinks, $i;
+      } elsif (-d "$l/$i") {
+        push @ldirs, $i;
+      } else {
+        push @lfiles, $i
+      }
+    }
+    closedir LEFT;
+  }
+  {
+    my @names = sort readdir(RIGHT);
+    foreach my $i (@names) {
+      next if $i eq '.';
+      next if $i eq '..';
+      if (-l "$r/$i") {
+        push @rlinks, $i;
+      } elsif (-d "$r/$i") {
+        push @rdirs, $i;
+      } else {
+        push @rfiles, $i
+      }
+    }
+    closedir RIGHT;
+  }
+  my ($lc, $rc) = (scalar @llinks, scalar @rlinks);
+  unless ($lc == $rc) {
+    debug "link count mismatch $l / $r";
+    exit 1;
+  }
+  {
+    for (my $i = 0; $i < $lc; ++$i) {
+      my ($lfile, $rfile) = compare_items($l, $llinks[$i], $r, $rlinks[$i]);
+      $llink = readlink $lfile;
+      $rlink = readlink $rfile;
+      if ($llink ne $rlink) {
+        debug "$lfile($llink) does not match $rfile($rlink)";
+        exit 1;
+      }
+    }
+  }
+  ($lc, $rc) = (scalar @lfiles, scalar @rfiles);
+  unless ($lc == $rc) {
+    debug "file count mismatch $l / $r";
+    exit 1;
+  }
+  {
+    for (my $i = 0; $i < $lc; ++$i) {
+      my ($lfile, $rfile) = compare_items($l, $lfiles[$i], $r, $rfiles[$i]);
+      system('cmp', '-s', $lfile, $rfile);
+      if ($? == -1) {
+        debug "failed to execute: $!";
+        exit 1;
+      }
+      if ($? & 127) {
+        debug "cmp died!";
+        exit 1;
+      }
+      if ($? >> 8) {
+        debug "$lfile does not match $rfile";
+        exit 1;
+      }
+    }
+  }
+  ($lc, $rc) = (scalar @ldirs, scalar @rdirs);
+  unless ($lc == $rc) {
+    debug "dir count mismatch $l / $r";
+    exit 1;
+  }
+  {
+    for (my $i = 0; $i < $lc; ++$i) {
+      my ($lfile, $rfile) = compare_items($l, $ldirs[$i], $r, $rdirs[$i]);
+      compare_dirs($lfile, $rfile);
+    }
+  }
+}
+
+sub main {
+  debug qq!Comparing: "$left" "$right"
+!;
+
+  if ($left eq $right) {
+    debug "paths are actually the same!";
+    exit 0;
+  }
+
+  ensure_d($left);
+  ensure_d($right);
+  compare_dirs($left, $right);
+  exit 0;
+}
+
+main();

+ 179 - 86
genxref

@@ -63,30 +63,23 @@ my @reserved = ('auto', 'break', 'case', 'char', 'const', 'continue',
 my @reservedJS = ( 'abstract',   'as',   'break',   'case',   'catch',   'class',   'const',   'continue',  'default',   'delete',   'do',   'else',   'export',   'extends',   'false',   'final',   'finally',   'for',   'function',   'if',   'import',   'in',   'instanceof',   'is',   'namespace',   'new',   'null',   'package',   'private',   'public',   'return',   'static',   'super',   'switch',   'this ',  'throw',   'true',   'try',   'typeof',   'use',   'var',   'let',    'void',   'while',   'with');
 
 my @reservedXUL = (''); #nothing yet
-
 my @reservedXBL = (''); #nothing yet
 
-my @ft;
-
 my %xref;
 
-my @f;
-my @jsfiles;
-my @plfiles;
-my @idlfiles;
-my @xblfiles;
+my (@ft, @f, @jsfiles, @plfiles, @idlfiles, @xblfiles);
 
 my $ident = '\~?_*[a-zA-Z][a-zA-Z0-9_]*';
 my $fnum = 1;
 
 my $plidentp = '[$@%&]';
-my $realpath = $ARGV[0];
+my ($realpath, $verb, $fix, @extra) = @ARGV;
 $realpath ||= '.';
 $realpath .= '/';
+$verb = 'default' unless defined $verb;
+my $suffix = defined $fix ? ".$fix" : '';
 
-
-my $totaldefs = 0;
-my $totalrefs = 0;
+my ($totaldefs, $totalrefs) = (0, 0);
 
 sub wash {
     my $towash = $_[0];
@@ -1177,15 +1170,13 @@ sub findusage {
 	  " $refs references to known identifiers found (total: $totalrefs).\n\n");
 }
 
-
 sub dumpdb {
-    print STDERR "Starting pass 3: Dump database to disk.\n";
+    print STDERR "Starting stage 3: Dump database to disk.\n";
 
     my $start = time;
     my %xrefdb;
-
     tie (%xrefdb, "DB_File" , "xref.out.$$", O_RDWR|O_CREAT, 0664, $DB_HASH)
-	|| die("Could not open \"xref\" for writing");
+        || die("Could not open \"xref\" for writing");
 
     my $i = 0;
     my $k;
@@ -1200,81 +1191,183 @@ sub dumpdb {
     }
 
     untie(%xrefdb);
-    rename("xref.out.$$", "xref") || die "Couldn't rename xref.out.$$ to xref";
+    rename("xref.out.$$", "xref$suffix") || die "Couldn't rename xref.out.$$ to xref$suffix";
     print(STDERR
-	  "Completed pass 3 (",(time-$start),"s):",
+	  "Completed stage 3 (",(time-$start),"s):",
 	  " Information on $i identifiers dumped to disk.\n\n");
+
+    dbmclose(%fileidx);
+
+    rename("fileidx.out.$$", "fileidx$suffix")
+        || die "Couldn't rename fileidx.out.$$ to fileidx$suffix";
 }
 
+sub renumber {
+    my ($line, $fadjust) = @_;
+#                $xref{$3} .= "$2$fnum:$1\t";
+    my @refs = split /\t/, $line;
+    $line = '';
+    foreach my $v (@refs) {
+       $v =~ /(.)(\d+):(.*)/;
+       my ($kind, $fileno, $lineno) = ($1, $fadjust + $2, $3);
+       $line .= "$kind$fileno:$lineno\t";
+    }
+    return $line;
+}
+
+sub merge {
+    print STDERR "Starting stage 4: Merging database to disk.\n";
+
+    my $start = time;
+    tie (%fileidx, "DB_File", "fileidx.out.$$", O_RDWR|O_CREAT, 0660, $DB_HASH)
+        || die("Could not open \"fileidx.out.$$\" for writing");
+
+    my %xrefdb;
+    tie (%xrefdb, "DB_File" , "xref.out.$$", O_RDWR|O_CREAT, 0664, $DB_HASH)
+        || die("Could not open \"xref\" for writing");
+
+    my ($i, $k, $v);
+    my $fadjust = 0;
+    my $limit = scalar @extra;
+    for (my $e = 0; $e < $limit; ++$e) {
+        my $fix = $extra[$e];
+        my ($prefix, $suffix) = ("$fix/", ".$fix");
+        my %idx;
+        tie (%idx, "DB_File", "fileidx$suffix", O_RDONLY, undef, $DB_HASH)
+            || die("Could not open \"fileidx$suffix\" for reading");
+        my $f = -1;
+        while (($k, $v) = each(%idx)) {
+            $f = $k if ($k > $f);
+            $fileidx{$k + $fadjust} = "$prefix$v";
+            unless ($i % 100) {
+                print(STDERR "(Pass 4) file $i [$fix] of $e / $limit files...\n");
+            }
+        }
+        untie (%idx);
+
+        tie (%xref, "DB_File" , "xref$suffix", O_RDONLY, undef, $DB_HASH)
+            || die("Could not open \"xref$suffix\" for reading");
 
-tie (%fileidx, "DB_File", "fileidx.out.$$", O_RDWR|O_CREAT, 0660, $DB_HASH)
-    || die("Could not open \"fileidx.out.$$\" for writing");
-
-
-open(FILES, "find -L $realpath -type f -print |");
-print(STDERR "Starting pass 0: Checking for files to index.\n");
-print(STDERR "looking in $realpath.\n");
-
-
-while (my $file = <FILES>) {
-    $file =~ s/^\Q$realpath\E|\s+$//og;
-    #   Duplicated in lib/LXR/Common.pm
-    if ($file =~ /\.(?:(hh?|cpp?|c[cs]?|fin|tbl)|(java)|(jsm?)(?:\.in|)|(p[lm]|cgi)(?:\.in|)|(idl)|(xml))$/i) {
-        if ($1) {
-            push @ft, 0;
-            push @f, $file;
-        } elsif ($2) {
-            push @ft, 1;
-            push @f, $file;
-        } elsif ($3) {
-            push @jsfiles, $file;
-        } elsif ($4) {
-            push @plfiles, $file;
-        } elsif ($5) {
-            push @idlfiles, $file;
-        } elsif ($6) {
-            push @xblfiles, $file;
+        while (($k, $v) = each(%xref)) {
+            $i++;
+            $xrefdb{$k} .= renumber($v, $fadjust);
+            unless ($i % 100) {
+                print(STDERR "(Pass 4) identifier $i [$fix] of $e / $limit files...\n");
+            }
         }
+        untie (%xref);
+        $fadjust += $f + 1;
     }
+    untie(%xrefdb);
+    rename("xref.out.$$", "xref") || die "Couldn't rename xref.out.$$ to xref";
+    print(STDERR
+          "Completed stage 4 (",(time-$start),"s):",
+          "Information on $i identifiers dumped to disk.\n\n");
+    untie(%fileidx);
+    rename("fileidx.out.$$", "fileidx")
+        || die "Couldn't rename fileidx.out.$$ to fileidx";
+    foreach my $fix (@extra) {
+        my ($suffix) = (".$fix");
+    }
+}
+
+sub buildList {
+    my ($fspre, $fspost) = ('', '');
+    if ($suffix ne '') {
+       $fspost = '! -type l';
+    } else {
+       $fspre = '-L';
+    }
+    open(FILES, "find $fspre $realpath $fspost -type f -print |");
+    print(STDERR "Starting pass 0: Checking for files to index.\n",
+                 "Looking in $realpath.\n");
+
+    while (my $file = <FILES>) {
+        $file =~ s/^\Q$realpath\E|\s+$//og;
+        #   Duplicated in lib/LXR/Common.pm
+       if ($file =~ /\.(?:(hh?|cpp?|c[cs]?|fin|tbl)|(java)|(jsm?)(?:\.in|)|(p[lm]|cgi)(?:\.in|)|(idl)|(xml))$/i) {
+           if ($1) {
+               push @ft, 0;
+               push @f, $file;
+           } elsif ($2) {
+               push @ft, 1;
+               push @f, $file;
+           } elsif ($3) {
+               push @jsfiles, $file;
+           } elsif ($4) {
+               push @plfiles, $file;
+           } elsif ($5) {
+               push @idlfiles, $file;
+           } elsif ($6) {
+               push @xblfiles, $file;
+           }
+        }
+    }
+    close FILES;
+}
+
+# Stage 0: build file list
+# Stage 1: find identifiers
+# - this is pass 1 over the file content
+# Stage 2: find references
+# - this is pass 2 over the file content
+# Stage 3: dump the database
+# - we run from 0..3 for a "default" action
+# Stage 4: merge databases
+# - we only do this stage for a "merge" action
+sub buildIndex {
+    tie (%fileidx, "DB_File", "fileidx.out.$$", O_RDWR|O_CREAT, 0660, $DB_HASH)
+        || die("Could not open \"fileidx.out.$$\" for writing");
+
+    buildList();
+    print 'Stage 0 IDL file count is : ' . scalar(@idlfiles) . '
+Stage 0 C/C++ file count is : ' . scalar(@f) . '
+Stage 0 JS file count is : ' . scalar(@jsfiles) . '
+Stage 0 PL file count is : ' . scalar(@plfiles) . '
+';
+
+    $fnum = 0;
+
+    # we need to search for IDL identifiers first because they can be
+    # the only prototype
+    # XXX i'm not certain this is necessary, oh well.
+    &findidentIDL;
+    print "Stage 1 IDL XREF keycount is : " . scalar(keys %xref) . "\n";
+    &findident;
+    print "Stage 1 C/C++ XREF keycount is : " . scalar(keys %xref) . "\n";
+    &findidentJS;
+    print "Stage 1 C/C++/JS XREF keycount is : " . scalar(keys %xref) . "\n";
+    &findidentPL;
+    print "Stage 1 C/C++/JS/PL XREF keycount is : " . scalar(keys %xref) . "\n";
+    &findidentXBL;
+    print "Stage 1 C/C++/JS/PL/XBL XREF keycount is : " . scalar(keys %xref) . "\n";
+
+    $fnum = 0;
+
+    &findusageIDL;
+    print "Stage 2 IDL XREF keycount is : " . scalar(keys %xref) . "\n";
+    &findusage;
+    print "Stage 2 C/C++ XREF keycount is : " . scalar(keys %xref) . "\n";
+    &findusageJS;
+    print "Stage 2 C/C++/JS XREF keycount is : " . scalar(keys %xref) . "\n";
+    &findusagePL;
+    print "Stage 2 C/C++/JS/PL keycount is : " . scalar(keys %xref) . "\n";
+
+    &dumpdb;
+}
+
+for ($verb) {
+    /^default$/ && do {
+        # Build database for directory.
+        # Files are parsed using whole file regexp's.
+        # This is SLOW.
+        buildIndex();
+        last;
+    };
+    /^merge$/ && do {
+        # Merge databases
+        unshift @extra, $fix;
+        merge();
+        last;
+    };
 }
-close FILES;
-
-print "Stage 0 IDL file count is : " . scalar(@idlfiles) . "\n";
-print "Stage 0 C/C++ file count is : " . scalar(@f) . "\n";
-print "Stage 0 JS file count is : " . scalar(@jsfiles) . "\n";
-print "Stage 0 PL file count is : " . scalar(@plfiles) . "\n";
-
-
-$fnum = 0;
-
-# we need to search for IDL identifiers first because they can be
-# the only prototype
-# XXX i'm not certain this is necessary, oh well.
-&findidentIDL;
-print "Stage 1 IDL XREF keycount is : " . scalar(keys %xref) . "\n";
-&findident;
-print "Stage 1 C/C++ XREF keycount is : " . scalar(keys %xref) . "\n";
-&findidentJS;
-print "Stage 1 C/C++/JS XREF keycount is : " . scalar(keys %xref) . "\n";
-&findidentPL;
-print "Stage 1 C/C++/JS/PL XREF keycount is : " . scalar(keys %xref) . "\n";
-&findidentXBL;
-print "Stage 1 C/C++/JS/PL/XBL XREF keycount is : " . scalar(keys %xref) . "\n";
-
-$fnum = 0;
-
-&findusageIDL;
-print "Stage 2 IDL XREF keycount is : " . scalar(keys %xref) . "\n";
-&findusage;
-print "Stage 2 C/C++ XREF keycount is : " . scalar(keys %xref) . "\n";
-&findusageJS;
-print "Stage 2 C/C++/JS XREF keycount is : " . scalar(keys %xref) . "\n";
-&findusagePL;
-print "Stage 2 C/C++/JS/PL keycount is : " . scalar(keys %xref) . "\n";
-
-&dumpdb;
-
-dbmclose(%fileidx);
-
-rename("fileidx.out.$$", "fileidx")
-    || die "Couldn't rename fileidx.out.$$ to fileidx";

+ 52 - 2
update-xref.pl

@@ -20,6 +20,8 @@ uptime
 );
 =cut
 
+my $DEBUGGER = '';
+
 my @paths=qw(
 /usr/local/bin
 /opt/local/bin
@@ -34,6 +36,7 @@ my $STDERRTODEVNUL = '2>/dev/null';
 my $ERROR_OUTPUT = $STDERRTOSTDOUT;
 
 my $TREE;
+my $by_unit = 0;
 
 sub do_mkdir {
   my $dir = shift;
@@ -49,11 +52,17 @@ sub process_args {
     $TREE = shift;
     if ($TREE) {
       if ($TREE eq '-cron') {
+        # run from a cron script, silence error output
         $was_arg = 1;
         $TIME = $UPTIME = '';
         $ERROR_OUTPUT = $STDERRTODEVNUL;
+      } elsif ($TREE eq '--by-unit') {
+        # index each top level directory individually and then merge
+        $was_arg = 1;
+        $by_unit = 1;
+      } else {
+        $TREE =~ s{/$}{};
       }
-      $TREE =~ s{/$}{};
     }
   } while ($TREE && $was_arg);
 }
@@ -136,7 +145,48 @@ chdir $db_tmp_dir || die "can't change to $db_tmp_dir";
 
 #XXX what does |set -e| mean?
 #system ("set -e >> $log");
-if (system("$TIME $lxr_dir/genxref $src_dir >> $log $ERROR_OUTPUT") == 0) {
+my $success = 0;
+if ($by_unit) {
+  chdir $src_dir;
+  my @dirs = sort <*>;
+  chdir $db_tmp_dir;
+  my ($othertree, $otherpath, $skipdb) = ('', '', '');
+  if ($TREE =~ /^(.*)-(?:.*?)$/) {
+    $othertree = $1;
+    $otherpath = $Conf->{'treehash'}{$othertree};
+    for my $tree (keys %{$Conf->{'treehash'}}) {
+      my $path = $Conf->{'treehash'}{$tree};
+      if ($otherpath eq $path) {
+        $skipdb = "$db_dir/../$tree/tmp";
+        last if -d $skipdb;
+      }
+      $skipdb = '';
+    }
+    unless ($otherpath && -d $otherpath && -d $skipdb) {
+      ($othertree, $otherpath, $skipdb) = ('', '', '');
+    }
+  }
+
+  foreach my $dir (@dirs) {
+    my $skip = 0;
+    if ($otherpath) {
+      $skip = 1 if system("$lxr_dir/compare-dir-trees.pl", "$src_dir/$dir", "$otherpath/$dir") == 0;
+    }
+    if ($skip) {
+      foreach my $file ("$skipdb/fileidx.$dir", "$skipdb/xref.$dir") {
+        if (-f $file) {
+          system('cp', '-lf', $file, '.');
+        }
+      }
+    } else {
+      $success = system("$TIME $DEBUGGER $lxr_dir/genxref $src_dir/$dir default $dir >> $log $ERROR_OUTPUT") == 0;
+    }
+  }
+  $success = system("$TIME $DEBUGGER $lxr_dir/genxref $src_dir merge ".join(' ',@dirs)." >> $log $ERROR_OUTPUT") == 0;
+} else {
+  $success = system("$TIME $DEBUGGER $lxr_dir/genxref $src_dir >> $log $ERROR_OUTPUT") == 0;
+}
+if ($success) {
   if (system("chmod", "-R", "a+r", $db_tmp_dir)) {
     die "chmod failed";
   }