genxref 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381
  1. #!/usr/bin/perl
  2. # $Id: genxref,v 1.7 2006/12/07 04:59:38 reed%reedloden.com Exp $
  3. # genxref.pl -- Finds identifiers in a set of C files using an
  4. # extremely fuzzy algorithm. It sort of works.
  5. #
  6. # Arne Georg Gleditsch <argggh@ifi.uio.no>
  7. # Per Kristian Gjermshus <pergj@ifi.uio.no>
  8. #
  9. #
  10. # This program is free software; you can redistribute it and/or modify
  11. # it under the terms of the GNU General Public License as published by
  12. # the Free Software Foundation; either version 2 of the License, or
  13. # (at your option) any later version.
  14. #
  15. # This program is distributed in the hope that it will be useful,
  16. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. # GNU General Public License for more details.
  19. #
  20. # You should have received a copy of the GNU General Public License
  21. # along with this program; if not, write to the Free Software
  22. # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  23. # TODO: ns/cmd/xfe/src/MozillaApp.h, XFE_MozillaApp
  24. ######################################################################
  25. use lib 'lib';
  26. use integer;
  27. use DB_File;
  28. use strict;
  29. my %fileidx;
  30. my %itype = (('macro', 'M'),
  31. ('typedef', 'T'),
  32. ('struct', 'S'),
  33. ('enum', 'E'),
  34. ('union', 'U'),
  35. ('function', 'F'),
  36. ('funcprot', 'f'),
  37. ('class', 'C'), # (C++)
  38. ('classforw', 'c'), # (C++)
  39. ('var', 'V'),
  40. ('interface', 'I'),
  41. ('reference', 'R'),
  42. );
  43. my @reserved = ('auto', 'break', 'case', 'char', 'const', 'continue',
  44. 'default', 'do', 'double', 'else', 'enum', 'extern',
  45. 'float', 'for', 'goto', 'if', 'int', 'long', 'register',
  46. 'return', 'short', 'signed', 'sizeof', 'static',
  47. 'struct', 'switch', 'typedef', 'union', 'unsigned',
  48. 'void', 'volatile', 'while', 'fortran', 'asm',
  49. 'inline', 'operator',
  50. 'class', # (C++)
  51. # Her bør vi ha flere av disse:
  52. '__inline', # seved
  53. '__asm__','__inline__');
  54. my @reservedJS = ( 'abstract', 'as', 'break', 'case', 'catch', 'class', 'const', 'continue', 'default', 'delete', 'do', 'else', 'export', 'extends', 'false', 'final', 'finally', 'for', 'function', 'if', 'import', 'in', 'instanceof', 'is', 'namespace', 'new', 'null', 'package', 'private', 'public', 'return', 'static', 'super', 'switch', 'this ', 'throw', 'true', 'try', 'typeof', 'use', 'var', 'let', 'void', 'while', 'with');
  55. my @reservedXUL = (''); #nothing yet
  56. my @reservedXBL = (''); #nothing yet
  57. my %xref;
  58. my (@ft, @f, @jsfiles, @plfiles, @idlfiles, @xblfiles);
  59. my $ident = '\~?_*[a-zA-Z][a-zA-Z0-9_]*';
  60. my $fnum = 1;
  61. my $plidentp = '[$@%&]';
  62. my ($realpath, $verb, $fix, @extra) = @ARGV;
  63. $realpath ||= '.';
  64. $realpath .= '/';
  65. $verb = 'default' unless defined $verb;
  66. my $suffix = defined $fix ? ".$fix" : '';
  67. my ($totaldefs, $totalrefs) = (0, 0);
  68. sub wash {
  69. my $towash = $_[0];
  70. return ("\n" x ($towash =~ tr/\n//));
  71. }
  72. sub stripodd {
  73. my $tostrip = $_[0];
  74. while ($tostrip =~ s/\{([^\{\}]*)\}/\05$1\06/gs) {}
  75. $tostrip =~ s/[\{\}]//gs; # remove *before* restoring outermost block
  76. $tostrip =~ tr/\05\06/\{\}/;
  77. return($tostrip);
  78. }
  79. sub enumval {
  80. my $enum_conts = $_[0];
  81. $enum_conts =~ s/($ident)([^,]*(?:,|$))/"enum \01$itype{'enum'}$1\02 ;".&wash($2)/ges;
  82. return($enum_conts);
  83. }
  84. sub classes {
  85. my @c = (shift =~ /($ident)\s*(?:$|,)/gm);
  86. if (@c) {
  87. return(join(":", @c)."::");
  88. } else {
  89. return('');
  90. }
  91. }
  92. sub findidentXBL {
  93. print(STDERR "Starting pass 1 for XBL: Collect identifier definitions.\n");
  94. my $start = time;
  95. my $defs = 0;
  96. my $f = "";
  97. my $contents = "";
  98. my @contents;
  99. my $curfnum = 0;
  100. foreach $f (@xblfiles) {
  101. $fileidx{++$fnum} = $f;
  102. ++$curfnum;
  103. open(SRCFILE, $realpath.$f);
  104. {
  105. local $/ = undef;
  106. $contents = <SRCFILE>;
  107. }
  108. close(SRCFILE);
  109. print(STDERR
  110. "(Pass 1 XBL) $f (",length($contents),
  111. "), file $curfnum of ",$#xblfiles+1,"...\n");
  112. # XBL
  113. next unless $contents =~ /<!DOCTYPE\s+bindings/ms;
  114. # Remove comments.
  115. # Magic
  116. $contents =~ s/<\?([^?]*(?:\?[^?>]|))*\?>/&wash($1)/ges;
  117. # XML Comment
  118. $contents =~ s/^<!--(?:\w+?\n)-->/&wash($1)/ges;
  119. # Preprocessor Comment
  120. $contents =~ s/^\s*\#[^\n]*//g;
  121. # From here on, \01 and \02 are used to encapsulate found
  122. # identifiers,
  123. my ($ct, $ct2, $input);
  124. $ct = $input = $contents;
  125. $contents = '';
  126. my $ct2 = ($ct =~ s/\n//g) || 0;
  127. my ($ids, $junk, $tailjunk, $junkkind);
  128. my (@tagstack, @tagattrs);
  129. # <tag...>
  130. # </tag>
  131. my ($class, $func);
  132. my $l = 1;
  133. while ($input =~ s{(^.*?<)(/?)(\w+)([^>]*)>}{}ms) {
  134. my ($pre, $close, $tag, $rest) = ($1, $2, $3, $4);
  135. $l += ($pre =~ s/\n//g) || 0;
  136. if ($close) {
  137. pop @tagstack;
  138. pop @tagattrs;
  139. for ($tag) {
  140. /^binding$/ && do {
  141. $class = undef;
  142. last;
  143. };
  144. /^(?:property|method|field)$/ && do {
  145. $func = undef;
  146. last;
  147. };
  148. }
  149. next;
  150. }
  151. my %attrs = ();
  152. push @tagstack, $tag;
  153. push @tagattrs, \%attrs;
  154. my %attrlines = ();
  155. my $attrline = 0;
  156. while ($rest =~ /(\s+)(\S+)(\s*=\s*)(?:"([^"]*)"|'([^']*)')/g) {
  157. my ($pre, $attr, $val) = ($1.$3, $2.$5, $4.$6);
  158. $attrline += ($pre =~ s/\n//g) || 0;
  159. $attrs{$attr} = $val;
  160. $attrlines{$attr} = $attrline;
  161. $attrline += ($val =~ s/\n//g) || 0;
  162. }
  163. for ($tag) {
  164. /^(?:content|children|resources|body|handlers)$/ && do {
  165. last;
  166. };
  167. /^binding$/ && do {
  168. # Binding (class) declarations
  169. if (defined $attrs{'id'}) {
  170. $class = $attrs{'id'};
  171. $contents = $contents . "\04$l\01" .
  172. $itype{'classforw'} .
  173. $class .
  174. "\02";
  175. }
  176. last;
  177. };
  178. /^implementation$/ && do {
  179. # Implementation (interface) derivations
  180. if (defined $attrs{'implements'}) {
  181. $contents .=
  182. "\04$l\01" .
  183. $itype{'class'} .
  184. &classes($attrs{'implements'}) .
  185. "$class\02";
  186. }
  187. last;
  188. };
  189. /^(de|con)structor$/ && do {
  190. if (defined $class) {
  191. $contents .=
  192. "\04$l\01" .
  193. $itype{'function'} .
  194. &classes($class) .
  195. ($1 eq 'de' ? '~' : '') .
  196. $class .
  197. "\02";
  198. }
  199. last;
  200. };
  201. /^([gs])etter$/ && do {
  202. my $prefix = $1;
  203. $func =~ /^(.)(.*)/;
  204. # as an experiment, this declares both the flavors
  205. $contents .=
  206. "\04$l\01" .
  207. $itype{'function'} .
  208. $prefix . 'et' . uc($1) . $2 .
  209. "\02";
  210. last;
  211. };
  212. /^(?:property|method|field)$/ && do {
  213. if (defined $attrs{'name'}) {
  214. $func = $attrs{'name'};
  215. $contents .=
  216. "\04$l\01" .
  217. $itype{'function'} .
  218. $func .
  219. "\02";
  220. foreach my $attr (qw(onget onset)) {
  221. if (defined $attrs{$attr}) {
  222. $attr =~ /on([gs])et/;
  223. my $prefix = $1;
  224. $func =~ /(.)(.*)/;
  225. $contents .=
  226. "\04".($l+$attrlines{$attr})."\01" .
  227. $itype{'function'} .
  228. $prefix . 'et' . uc($1) . $2 .
  229. "\02";
  230. }
  231. }
  232. }
  233. last;
  234. };
  235. /^handler$/ && do {
  236. if (defined $attrs{'event'}) {
  237. $func = $attrs{'event'};
  238. $contents .=
  239. "\04$l\01" .
  240. $itype{'function'} .
  241. $func .
  242. "\02";
  243. $func = "on$func";
  244. $contents .=
  245. "\04$l\01" .
  246. $itype{'function'} .
  247. $func .
  248. "\02";
  249. }
  250. last;
  251. };
  252. /^parameter$/ && do {
  253. if (defined $attrs{'name'}) {
  254. $contents .=
  255. "\04$l\01" .
  256. $itype{'reference'} .
  257. $attrs{'name'} .
  258. "\02";
  259. }
  260. last;
  261. };
  262. }
  263. $l += $attrline;
  264. }
  265. foreach ($contents =~ /^(.*)/gm) {
  266. while (/\04(\d+)\01(.)($ident)\02/go) {
  267. $xref{$3} .= "$2$fnum:$1\t";
  268. $defs++;
  269. }
  270. }
  271. }
  272. # Cleanup.
  273. foreach (@reservedJS) {
  274. delete($xref{$_});
  275. }
  276. $totaldefs = $totaldefs + $defs;
  277. print(STDERR
  278. "Completed pass 1 XBL (",(time-$start),"s):",
  279. " $defs definitions found (total found so far: $totaldefs).\n\n");
  280. }
  281. sub findidentPL {
  282. print(STDERR "Starting pass 1 for Perl: Collect identifier definitions.\n");
  283. my $start = time;
  284. my $defs = 0;
  285. my $f = "";
  286. my $contents = "";
  287. my @contents;
  288. my $curfnum = 0;
  289. foreach $f (@plfiles) {
  290. $fileidx{++$fnum} = $f;
  291. ++$curfnum;
  292. open(SRCFILE, $realpath.$f);
  293. {
  294. local $/ = undef;
  295. $contents = <SRCFILE>;
  296. }
  297. close(SRCFILE);
  298. print(STDERR
  299. "(Pass 1 Perl) $f (",length($contents),
  300. "), file $curfnum of ",$#plfiles+1,"...\n");
  301. # Remove comments.
  302. $contents =~ s/(\n)=(\w+?\n.*?)=cut/&wash($1 . $2)/ges; # Perl Pod
  303. $contents =~ s/\#[^\n]*//g; # Perl Comment
  304. # Unwrap continuation lines.
  305. $contents =~ s/\\\s*\n/\05/gs;
  306. while ($contents =~ s/\05([^\n\05]+)\05/$1\05\05/gs) {}
  307. $contents =~ s/(\05+)([^\n]*)/"$2"."\n" x length($1)/gse;
  308. # Remove nested parentheses.
  309. while ($contents =~ s/\(([^\)]*)\(/\($1\05/g ||
  310. $contents =~ s/\05([^\(\)]*)\)/ $1 /g) {}
  311. # Some heuristics here: Try to recognize "code" and delete
  312. # everything up to the next block delimiter.
  313. # $contents =~ s/([\;\}\{])(\s*$ident\s*\([^\)]*\)[^\{\}]*)/
  314. # "$1".&wash($2)/goes;
  315. # $contents =~ s/([\;\{])(\s*\**$ident\s*\=[^\{\}]*)/
  316. # "$1".&wash($2)/goes;
  317. # Parentheses containing commas are probably not interesting.
  318. $contents =~ s/\(([^\)]*\,[^\)]*)\)/
  319. "()".&wash($1)/ges;
  320. # Ranges are uninteresting (and confusing).
  321. $contents =~ s/\[(.*?)\]/&wash($1)/ges;
  322. # From here on, \01 and \02 are used to encapsulate found
  323. # identifiers,
  324. my $ct = $contents;
  325. my $ct2 = ($ct =~ s/\n//g) || 0;
  326. my ($ids, $junk, $tailjunk, $junkkind);
  327. @contents = split(/[;}]/, $contents);
  328. $contents = '';
  329. my $l = 1;
  330. foreach (@contents) {
  331. my $line = $ct = $_;
  332. $ct2 = ($ct =~ s/\n//g) || 0;
  333. my $bl = $l;
  334. # Package (class) declarations
  335. if ($line =~ /^(\s*(?:package)\s+(?:\s*$ident\s*::)*\s*)($ident)(\s*)$/m) {
  336. ($ct, $ids, $tailjunk) = ($1, $2, $3);
  337. $l += ($ct =~ s/\n//g) || 0;
  338. $contents .= "\04$l\01".$itype{'class'}."$ids\02";
  339. }
  340. $l = $bl;
  341. # Variable (var) declarations
  342. if ($line =~ /^(\s*(?:my|local|our)\s+)($plidentp)($ident(?:\s*,\s*$plidentp$ident)*)(.*?)$/m) {
  343. ($ct, $ids, $tailjunk, $junkkind) = ($1, $3, $4, $2);
  344. $l += ($ct =~ s/\n//g) || 0;
  345. my @idds = split /\b/, $ids;
  346. while (@idds) {
  347. my $lid = shift @idds;
  348. $lid =~ /($ident)/;
  349. $contents .= "\04$l\01".$itype{'var'}."$1\02";
  350. my $spc;
  351. do {
  352. $spc = shift @idds;
  353. $l += $spc =~ s/[\r\n]//g;
  354. } while ($spc =~ /[,=]/);
  355. }
  356. }
  357. $l = $bl;
  358. # Function (function) declarations
  359. if ($line =~ /^(.*?\s*\b)sub(\s+)($ident|)(\s*.*)$/sm) {
  360. my ($impl, $ws1, $ws2, $ws3) = ($3, $1, $2, $4);
  361. $l += $ws1 =~ s/\n//g;
  362. $l += $ws2 =~ s/\n//g;
  363. $contents .= "\04$l\01".$itype{'function'}."$impl\02" if $impl;
  364. $l += $ws3 =~ s/\n//g;
  365. }
  366. $l = $bl;
  367. # constant property () declarations
  368. =useless
  369. if ($line =~ /($ident)\s*=>/m) {
  370. }
  371. $l = $bl;
  372. =cut
  373. $l += $ct2;
  374. }
  375. foreach ($contents =~ /^(.*)/gm) {
  376. while (/\04(\d+)\01(.)($ident)\02/go) {
  377. $xref{$3} .= "$2$fnum:$1\t";
  378. $defs++;
  379. }
  380. }
  381. }
  382. # Cleanup.
  383. foreach (@reservedJS) {
  384. delete($xref{$_});
  385. }
  386. $totaldefs = $totaldefs + $defs;
  387. print(STDERR
  388. "Completed pass 1 Perl (",(time-$start),"s):",
  389. " $defs definitions found (total found so far: $totaldefs).\n\n");
  390. }
  391. sub findidentJS {
  392. print(STDERR "Starting pass 1 for Javascript: Collect identifier definitions.\n");
  393. my $start = time;
  394. my $defs = 0;
  395. my $f = "";
  396. my $contents = "";
  397. my @contents;
  398. my $curfnum = 0;
  399. foreach $f (@jsfiles) {
  400. $fileidx{++$fnum} = $f;
  401. ++$curfnum;
  402. open(SRCFILE, $realpath.$f);
  403. {
  404. local $/ = undef;
  405. $contents = <SRCFILE>;
  406. }
  407. close(SRCFILE);
  408. print(STDERR
  409. "(Pass 1 JS) $f (",length($contents),
  410. "), file $curfnum of ",$#jsfiles+1,"...\n");
  411. # Remove comments.
  412. $contents =~ s{/\*(.*?)\*/}{&wash($1)}ges;
  413. $contents =~ s{//[^\n]*}{}g; # C++
  414. # $contents =~ s{^# [^\n]*|^#($)}{$1}gm; # evil preprocessor
  415. # Remove license blocks.
  416. $contents =~ s{(# \*\*\*\*\* BEGIN LICENSE BLOCK \*\*\*\*\*.*?# \*\*\*\*\* END LICENSE BLOCK \*\*\*\*\*)}{&wash($1)}ges;
  417. # Unwrap continuation lines.
  418. $contents =~ s/\\\s*\n/\05/gs;
  419. while ($contents =~ s/\05([^\n\05]+)\05/$1\05\05/gs) {}
  420. $contents =~ s/(\05+)([^\n]*)/"$2"."\n" x length($1)/gse;
  421. # Remove nested parentheses.
  422. # while ($contents =~ s/\(([^\)]*)\(/\($1\05/g ||
  423. # $contents =~ s/\05([^\(\)]*)\)/ $1 /g) {}
  424. # Some heuristics here: Try to recognize "code" and delete
  425. # everything up to the next block delimiter.
  426. # $contents =~ s/([\;\}\{])(\s*$ident\s*\([^\)]*\)[^\{\}]*)/
  427. # "$1".&wash($2)/goes;
  428. # $contents =~ s/([\;\{])(\s*\**$ident\s*\=[^\{\}]*)/
  429. # "$1".&wash($2)/goes;
  430. # Parentheses containing commas are probably not interesting.
  431. # $contents =~ s/\(([^\)]*\,[^\)]*)\)/
  432. # "()".&wash($1)/ges;
  433. # This operator-stuff messes things up. (C++)
  434. $contents =~ s/operator[\<\>\=\!\+\-\*\%\/]{1,2}/operator/g;
  435. # Ranges are uninteresting (and confusing).
  436. $contents =~ s/\[(.*?)\]/&wash($1)/ges;
  437. # From here on, \01 and \02 are used to encapsulate found
  438. # identifiers,
  439. =pod
  440. # Find class definitions. (C++)
  441. $contents =~ s/((class)\s+($ident)\s*(:[^;\{]*|)({}|(;)))/
  442. "$2 "."\01".$itype{$2.($6 ? 'forw' : '')}.
  443. &classes($4).$3."\02 ".$6.&wash($1)/goes;
  444. =cut
  445. my $pre;
  446. my $ct = $contents;
  447. my $ct2 = ($ct =~ s/\n//g) || 0;
  448. my ($ids, $junk);
  449. @contents = split(/[;}]/, $contents);
  450. $contents = '';
  451. my $l = 1;
  452. foreach (@contents) {
  453. my $line = $ct = $_;
  454. $ct2 = ($ct =~ s/\n//g) || 0;
  455. if ($line && $line =~ /(var|let|const)|(function|[gs]et)/) {
  456. my ($var_let_const, $function_get_set) = ($1, $2);
  457. my $bl = $l;
  458. if ($var_let_const &&
  459. $line =~ /^(.*\b(?:var|let|const)\s+)($ident(?:\s*(?:=[^,;]*|),\s*$ident)*)(.*?(?:\n|$))/s) {
  460. ($pre, $ids, $junk) = ($1, $2, $3);
  461. $l += ($pre =~ s/\n//g) || 0;
  462. my @idds = split /\b/, $ids;
  463. while (@idds) {
  464. my $lid = shift @idds;
  465. $lid =~ /($ident)/;
  466. $contents .= "\04$l\01".$itype{'var'}."$1\02";
  467. my $spc;
  468. do {
  469. $spc = shift @idds;
  470. $l += $spc =~ s/[\r\n]//g;
  471. } while ($spc =~ /[,=]/);
  472. }
  473. $l = $bl;
  474. }
  475. if ($function_get_set &&
  476. $line =~ /^(.*\n|)(.*?\s*)((?:$ident\s*[:=]*\s*)*)(?:function|[gs]et)(\s+)($ident|)(\s*.*)(\n|$)/s) {
  477. my ($decl, $impl, $ws1, $ws2, $ws3, $ws0) = ($3, $5, $2, $4, $6, $1);
  478. $l += $ws0 =~ s/\n//g;
  479. $l += $ws1 =~ s/\n//g;
  480. if ($decl) {
  481. my @idds = split /\b/, $decl;
  482. while (@idds) {
  483. my $lid = shift @idds;
  484. if ($lid =~ /($ident)/) {
  485. $contents .= "\04$l\01".$itype{'function'}."$1\02";
  486. } else {
  487. $l += $lid =~ s/\n//g;
  488. }
  489. }
  490. }
  491. $l += $ws2 =~ s/\n//g;
  492. $contents .= "\04$l\01".$itype{'function'}."$impl\02" if $impl;
  493. $l += $ws3 =~ s/\n//g;
  494. $l = $bl;
  495. }
  496. }
  497. $l += $ct2;
  498. }
  499. foreach ($contents =~ /^(.*)/gm) {
  500. while (/\04(\d+)\01(.)($ident)\02/go) {
  501. $xref{$3} .= "$2$fnum:$1\t";
  502. $defs++;
  503. }
  504. }
  505. }
  506. # Cleanup.
  507. foreach (@reservedJS) {
  508. delete($xref{$_});
  509. }
  510. $totaldefs = $totaldefs + $defs;
  511. print(STDERR
  512. "Completed pass 1 Javascript (",(time-$start),"s):",
  513. " $defs definitions found (total found so far: $totaldefs).\n\n");
  514. }
  515. sub c_clean {
  516. my $contents = $_[0];
  517. # We don't care so much about proper style,
  518. # from our perspective, most operators are equivalent to ,
  519. $contents =~ s{[-+<>=%&|?^]+}{,}g;
  520. # Find macro (un)definitions.
  521. my $l = 0;
  522. my $defs;
  523. foreach ($contents =~ /^(.*)/gm) {
  524. $l++;
  525. if (/^[ \t]*\#\s*(define|undef)\s+($ident)/o) {
  526. $xref{$2} .= "$itype{'macro'}$fnum:$l\t";
  527. $defs++;
  528. }
  529. }
  530. # We want to do some funky heuristics with preprocessor blocks
  531. # later, so mark them. (FIXME: #elif)
  532. $contents =~ s/^[ \t]*\#\s*if.*/\01/gm;
  533. $contents =~ s/^[ \t]*\#\s*else.*/\02/gm;
  534. $contents =~ s/^[ \t]*\#\s*endif.*/\03/gm;
  535. # Strip all preprocessor directives.
  536. $contents =~ s/^[ \t]*\#(.*)//gm;
  537. # Now, remove all odd block markers ({,}) we find inside
  538. # #else..#endif blocks. (And pray they matched one in the
  539. # preceding #if..#else block.)
  540. while ($contents =~ s/\02([^\01\02\03]*\03)/&stripodd($1)/ges ||
  541. $contents =~ s/\01([^\01\02\03]*)\03/&stripodd($1)/ges) {}
  542. while ($contents =~ /([\01\02\03\04\05])/gs) {
  543. print(STDERR "\t ** stray ".($1 eq "\01"
  544. ? "#if"
  545. : ($1 eq "\02"
  546. ? "#else"
  547. : ($1 eq "\03"
  548. ? "#endif"
  549. : "control sequence"
  550. )
  551. )
  552. )." found.\n");
  553. }
  554. $contents =~ s/[\01\02\03\04\05]//gs;
  555. # Special treatment of enum and struct: inhibit removal of such blocks
  556. $contents =~ s/(\s+enum\s+)($ident|)(\s*)\{([^\}]*)\}(\s*)($ident|)/
  557. "$1 ".($2 ? "\01".$itype{enum}.$2."\02 " : "").$3.&enumval($4).$5.($6 ? "\01".$itype{enum}.$6."\02 " : "")/goes;
  558. $contents =~ s/(\s+struct\s+)($ident|)(\s*)\{([^\}]*)\}(\s*)($ident|)/
  559. $1.$2.$3."{}".$4.$5.($6 ? "\01".$itype{typedef}.$6."\02 " : "")/goes;
  560. =pod
  561. # Remove all but outermost blocks. (No local variables.)
  562. while ($contents =~ s/\{([^\{\}]*)\}/
  563. "\05".&wash($1)/ges) {}
  564. $contents =~ s/\05/\{\}/gs;
  565. =cut
  566. # This operator-stuff messes things up. (C++)
  567. $contents =~ s/operator[\<\>\=\!\+\-\*\%\/]{1,2}/operator/g;
  568. # Ranges are uninteresting (and confusing).
  569. $contents =~ s/\[(.*?)\]/&wash($1)/ges;
  570. # And so are assignments.
  571. $contents =~ s/\=(.*?);/";".&wash($1)/ges;
  572. return $contents;
  573. }
  574. sub java_clean {
  575. my $contents = $_[0];
  576. while ($contents =~ s/(\{[^\{]*)\{([^\{\}]*)\}/
  577. $1."\05".&wash($2)/ges) {}
  578. $contents =~ s/\05/\{\}/gs;
  579. # Remove imports
  580. $contents =~ s/(^\s*import.*;)/&wash($1)/gem;
  581. # Remove packages
  582. $contents =~ s/(^\s*package.*;)/&wash($1)/gem;
  583. return $contents;
  584. }
  585. sub c_classes {
  586. my $contents = $_[0];
  587. # XXX this has issues w/ whitespace
  588. # Find struct, enum and union definitions with params (function returning struct etc)
  589. $contents =~ s/((struct|enum|union)\s+($ident|)\s+($ident|)\s*([{;]))/
  590. "$2 ".($3 ? "\01".$itype{$2}.$3."\02 " : " ").$4.$5.&wash($1)/gmoes;
  591. # Find struct, enum and union definitions.
  592. $contents =~ s/((struct|enum|union)\s+($ident|)\s*([{;]))/
  593. "$2 ".($3 ? "\01".$itype{$2}.$3."\02 " : "").$4.&wash($1)/gmoes;
  594. # Find class definitions. (C++)
  595. $contents =~ s/((class)\s+($ident)\s*(:[^;\{]*|)([{;]))/
  596. "$2 "."\01".$itype{$2.($5 eq ';' ? 'forw' : '')}.
  597. &classes($4).$3."\02 ".$5.&wash($1)/gmoes;
  598. return $contents;
  599. }
  600. sub java_classes {
  601. my $contents = $_[0];
  602. # Find Java classes
  603. $contents =~ s/((class)\s+($ident)\s*(extends\s+([\.\w]+)\s*|)(implements\s+([\.\w]+)|))/
  604. "$2 "."\01".$itype{$2}.&classes($5.", ".$7).$3."\02 ".
  605. &wash($1)/goes;
  606. # Find Java interfaces
  607. $contents =~ s/((interface)\s+($ident)\s*(extends\s+([\.\w]+)|))/
  608. "$2 "."\01".$itype{$2}.&classes($5).$3."\02 ".&wash($1)/goes;
  609. return $contents;
  610. }
  611. sub idl_decl {
  612. return 'NS_DECL_'.(uc shift);
  613. }
  614. sub idl_interfaces {
  615. my $contents = $_[0];
  616. # Find IDL interfaces
  617. $contents =~ s/((interface)\s+($ident)\s*(:[^;\{]*|)(\{|(;)))/
  618. "$2 ".($6 ? '' : "\01".$itype{macro}.idl_decl($3)."\02 ").
  619. "\01".$itype{$2.($6 ? 'forw' : '')}.
  620. &classes($4).$3."\02 ".$6.&wash($1)/goes;
  621. return $contents;
  622. }
  623. sub findidentIDL {
  624. print(STDERR "Starting pass 1 for IDL: Collect identifier definitions.\n");
  625. my $start = time;
  626. my $defs = 0;
  627. my $f = "";
  628. my $contents = "";
  629. my @contents;
  630. my $curfnum = 0;
  631. foreach $f (@idlfiles) {
  632. $fileidx{++$fnum} = $f;
  633. ++$curfnum;
  634. open(SRCFILE, $realpath.$f);
  635. $_ = $/; undef($/); $contents = <SRCFILE>; $/ = $_;
  636. close(SRCFILE);
  637. print(STDERR
  638. "(Pass 1 IDL) $f (",length($contents),
  639. "), file $curfnum of ",$#idlfiles+1,"...\n");
  640. # Remove comments.
  641. $contents =~ s/\/\*(.*?)\*\//&wash($1)/ges;
  642. $contents =~ s/\/\/[^\n]*//g; # C++
  643. # Remove annotations
  644. # XXX uuids should eventually be stored somehow
  645. $contents =~ s/\[[^]]*\]//gs;
  646. # Unwrap continuation lines.
  647. $contents =~ s/\\\s*\n/\05/gs;
  648. while ($contents =~ s/\05([^\n\05]+)\05/$1\05\05/gs) {}
  649. $contents =~ s/(\05+)([^\n]*)/"$2"."\n" x length($1)/gse;
  650. $contents = c_clean($contents);
  651. # Remove nested parentheses.
  652. while ($contents =~ s/\(([^\)]*)\(/\($1\05/g ||
  653. $contents =~ s/\05([^\(\)]*)\)/ $1 /g) {}
  654. # From here on, \01 and \02 are used to encapsulate found
  655. # identifiers,
  656. $contents = idl_interfaces($contents);
  657. @contents = split(/[;\}]/, $contents);
  658. $contents = '';
  659. foreach (@contents) {
  660. # readonly attribute
  661. # const
  662. # s/^(\s*)(struct|enum|union|inline)/$1/;
  663. # we don't care about [noscript] and similar friends
  664. # they're too complicated
  665. s/\[([^]]*)\]/&wash($1)/ge;
  666. if (/$ident[^a-zA-Z0-9_]+$ident/) { # It takes two, baby.
  667. my $t = /^\s*typedef/s; # Is this a type definition?
  668. s/((readonly\s+|)attribute\s+|)
  669. # ($1) readonly
  670. # ($2) attribute
  671. (\s*$ident\s*)
  672. # ($3) return or attribute type
  673. ($ident)
  674. # ($4) Match the identifier
  675. ([\s\)]*
  676. # ($5) Tokens allowed after identifier
  677. (\([^\)]*\)
  678. # ($6) Function parameters?
  679. |) # No function parameters
  680. (\s*(?:$|,)))/
  681. # ($7) Allowed termination chars.
  682. &wash($3)."\01".
  683. # identifier marker
  684. ($t
  685. # if type definition...
  686. ? $itype{'typedef'}
  687. # ..mark as such
  688. : ($6
  689. # $6 is empty unless function definition.
  690. ? $itype{'funcprot'}
  691. # function prototype.
  692. : $itype{'var'}
  693. # Variable.
  694. )
  695. )."$4\02 ".&wash($5 . $7)/goesx;
  696. }
  697. $contents .= $_;
  698. }
  699. my $l = 0;
  700. foreach ($contents =~ /^(.*)/gm) {
  701. $l++;
  702. while (/\01(.)(?:([^\01\02]+?)\s*::\s*|)($ident)\02/go) {
  703. $xref{$3} .= "$1$fnum:$l".($2 ? ":$2" : "")."\t";
  704. $defs++;
  705. }
  706. }
  707. }
  708. # Remove reserved from xref
  709. foreach (@reserved) {
  710. delete($xref{$_});
  711. }
  712. $totaldefs = $totaldefs + $defs;
  713. print(STDERR
  714. "Completed pass 1 IDL (",(time-$start),"s):",
  715. " $defs definitions found (total found so far: $totaldefs).\n\n");
  716. }
  717. sub findident {
  718. print(STDERR "Starting pass 1 for C/C++: Collect identifier definitions.\n");
  719. my $start = time;
  720. my $defs = 0;
  721. my $f = "";
  722. my $contents = "";
  723. my @contents;
  724. my $curfnum = 0;
  725. foreach $f (@f) {
  726. my ($java) = ($ft[$fnum] == 1);
  727. $fileidx{++$fnum} = $f;
  728. ++$curfnum;
  729. open(SRCFILE, $realpath.$f);
  730. $_ = $/; undef($/); $contents = <SRCFILE>; $/ = $_;
  731. close(SRCFILE);
  732. print(STDERR
  733. "(Pass 1 C/C++) $f (",length($contents),
  734. "), file $curfnum of ",$#f+1,"...");
  735. # Remove comments.
  736. $contents =~ s/\/\*(.*?)\*\//&wash($1)/ges;
  737. $contents =~ s/\/\/[^\n]*//g; # C++
  738. # Unwrap continuation lines.
  739. $contents =~ s/\\\n/\05/gs;
  740. while ($contents =~ s/\05([^\n\05]+)\05/$1\05\05/gs) {}
  741. $contents =~ s/(\05+)([^\n]*)/"$2"."\n" x length($1)/gse;
  742. if ($java) {
  743. $contents = java_clean($contents);
  744. } else {
  745. $contents = c_clean($contents);
  746. }
  747. # Remove nested parentheses.
  748. while ($contents =~ s/\(([^\)]*)\(/\($1\05/g ||
  749. $contents =~ s/\05([^\(\)]*)\)/ $1 /g) {}
  750. # Some heuristics here: Try to recognize "code" and delete
  751. # everything up to the next block delimiter.
  752. # $contents =~ s/([\;\}\{])(\s*$ident\s*\([^\)]*\)[^\{\}]*)/
  753. # "$1".&wash($2)/goes;
  754. # $contents =~ s/([\;\{])(\s*\**$ident\s*\=[^\{\}]*)/
  755. # "$1".&wash($2)/goes;
  756. # Parentheses containing commas are probably not interesting.
  757. $contents =~ s/\(([^\)]*\,[^\)]*)\)/
  758. "()".&wash($1)/ges;
  759. # From here on, \01 and \02 are used to encapsulate found
  760. # identifiers,
  761. if ($java) {
  762. $contents = java_classes($contents);
  763. } else {
  764. $contents = c_classes($contents);
  765. }
  766. @contents = split(/[;\}]/, $contents);
  767. $contents = '';
  768. foreach (@contents) {
  769. if (!$java) {
  770. my $t = /\n\s*typedef/s; # Is this a type definition?
  771. s/(\n\s*)(?:typedef|struct|enum|union|inline|static|__inline)/$1/g;
  772. if (/$ident[^a-zA-Z0-9_]+$ident/) { # It takes two, baby.
  773. s/($ident(?:\s*::\s*$ident|)) # ($1) Match the identifier
  774. ([\s\)]* # ($2) Tokens allowed after identifier
  775. (\([^\)]*\) # ($3) Function parameters?
  776. (?:\s*:[^\{]*|) # inheritage specification (C++)
  777. |) # No function parameters
  778. \s*($|,|\{))/ # ($4) Allowed termination chars.
  779. "\01". # identifier marker
  780. ($t # if type definition...
  781. ? $itype{'typedef'} # ..mark as such
  782. : ($3 # $3 is empty unless function definition.
  783. ? ($4 eq '{' # Terminating token indicates
  784. ? $itype{'function'} # function or
  785. : $itype{'funcprot'}) # function prototype.
  786. : $itype{'var'}) # Variable.
  787. )."$1\02 ".&wash($2)/goesx;
  788. }
  789. } else {
  790. s/($ident)\s*\([^\)]*\)([^\{]*)($|\{)/
  791. "\01".($3 eq '{' ? $itype{'function'} : $itype{'funcprot'})."$1\02 ".
  792. &wash($2)/goesx; # capture what's between the identifier & the '{' - it may contain newlines!
  793. s/($ident)\s*(=.*)$/
  794. "\01".$itype{'var'}."$1\02 ".&wash($2)/goesx;
  795. }
  796. $contents .= $_;
  797. }
  798. my $l = 0;
  799. my $ldefs = 0;
  800. foreach ($contents =~ /^(.*)/gm) {
  801. $l++;
  802. while (/\01(.)(?:(.+?)\s*::\s*|)($ident)\02/go) {
  803. $xref{$3} .= "$1$fnum:$l".($2 ? ":$2" : "")."\t";
  804. $defs++;
  805. $ldefs++;
  806. }
  807. }
  808. print(STDERR " $ldefs defs\n");
  809. }
  810. # Remove reserved from xref
  811. foreach (@reserved) {
  812. delete($xref{$_});
  813. }
  814. $totaldefs = $totaldefs + $defs;
  815. print(STDERR
  816. "Completed pass 1 C/C++ (",(time-$start),"s):",
  817. " $defs definitions found (total found so far: $totaldefs).\n\n");
  818. }
  819. sub findusagePL {
  820. print(STDERR "Starting pass 2 Perl: Generate reference statistics.\n");
  821. my $start = time;
  822. my $refs = 0;
  823. my $f;
  824. my $curfnum = 0;
  825. foreach $f (@plfiles) {
  826. ++$fnum;
  827. ++$curfnum;
  828. my $lcount = 0;
  829. my %tref = ();
  830. open(SRCFILE, $realpath.$f);
  831. $_ = $/; undef($/); my $contents = <SRCFILE>; $/ = $_;
  832. close(SRCFILE);
  833. print(STDERR
  834. "(Pass 2 Perl) $f (",length($contents),
  835. "), file $curfnum of ",$#plfiles+1,"...\n");
  836. # Remove comments.
  837. $contents =~ s/(\n)=(\w+?\n.*?)=cut/&wash($1 . $2)/ges; # Perl Pod
  838. $contents =~ s/\#[^\n]*//g; # Perl Comment
  839. # FIXME: "var"
  840. my @lines = split(/\n/, $contents);
  841. my $line;
  842. foreach $line (@lines) {
  843. $lcount++;
  844. foreach ($line =~ /(?:^|[^a-zA-Z_\#]|$plidentp)($ident)\b/og) {
  845. $tref{$_} .= "$lcount," if $xref{$_};
  846. }
  847. }
  848. while (($a, $b) = each(%tref)) {
  849. chop($b);
  850. $xref{$a} .= "R$fnum:$b\t";
  851. $refs++;
  852. }
  853. }
  854. $totalrefs = $totalrefs + $refs;
  855. print(STDERR
  856. "Completed pass 2 (",(time-$start),"s):",
  857. " $refs references to known identifiers found (total: $totalrefs).\n\n");
  858. }
  859. sub findusageJS {
  860. print(STDERR "Starting pass 2 Javascript: Generate reference statistics.\n");
  861. my $start = time;
  862. my $refs = 0;
  863. my $f;
  864. my $curfnum = 0;
  865. foreach $f (@jsfiles) {
  866. ++$fnum;
  867. ++$curfnum;
  868. my $lcount = 0;
  869. my %tref = ();
  870. open(SRCFILE, $realpath.$f);
  871. $_ = $/; undef($/); my $contents = <SRCFILE>; $/ = $_;
  872. close(SRCFILE);
  873. print(STDERR
  874. "(Pass 2 JS) $f (",length($contents),
  875. "), file $curfnum of ",$#jsfiles+1,"...\n");
  876. # Remove comments
  877. $contents =~ s/\/\*(.*?)\*\//&wash($1)/ges;
  878. $contents =~ s/\/\/[^\n]*//g;
  879. # Remove license blocks.
  880. $contents =~ s{(# \*\*\*\*\* BEGIN LICENSE BLOCK \*\*\*\*\*.*?# \*\*\*\*\* END LICENSE BLOCK \*\*\*\*\*)}{&wash($1)}ges;
  881. # FIXME: "var"
  882. my @lines = split(/\n/, $contents);
  883. my $line;
  884. foreach $line (@lines) {
  885. $lcount++;
  886. foreach ($line =~ /(?:^|[^a-zA-Z_\#])($ident)\b/og) {
  887. $tref{$_} .= "$lcount," if $xref{$_};
  888. }
  889. }
  890. while (($a, $b) = each(%tref)) {
  891. chop($b);
  892. $xref{$a} .= "R$fnum:$b\t";
  893. $refs++;
  894. }
  895. }
  896. $totalrefs = $totalrefs + $refs;
  897. print(STDERR
  898. "Completed pass 2 (",(time-$start),"s):",
  899. " $refs references to known identifiers found (total: $totalrefs).\n\n");
  900. }
  901. sub findusageIDL {
  902. print(STDERR "Starting pass 2 IDL: Generate reference statistics.\n");
  903. my $start = time;
  904. my $refs = 0;
  905. my $f;
  906. my $curfnum = 0;
  907. foreach $f (@idlfiles) {
  908. ++$fnum;
  909. ++$curfnum;
  910. my $lcount = 0;
  911. my %tref = ();
  912. open(SRCFILE, $realpath.$f);
  913. $_ = $/; undef($/); my $contents = <SRCFILE>; $/ = $_;
  914. close(SRCFILE);
  915. print(STDERR
  916. "(Pass 2 IDL) $f (",length($contents),
  917. "), file $curfnum of ",$#f+1,"...\n");
  918. =ignore
  919. # Remove comments
  920. $contents =~ s/\/\*(.*?)\*\//&wash($1)/ges;
  921. $contents =~ s/\/\/[^\n]*//g;
  922. # Remove include statements
  923. $contents =~ s/^[ \t]*\#include[ \t]+[^\n]*//gm;
  924. # FIXME: "var"
  925. my @lines = split(/\n/, $contents);
  926. my $line;
  927. foreach $line (@lines) {
  928. $lcount++;
  929. foreach ($line =~ /(?:^|[^a-zA-Z_\#])($ident)\b/og) {
  930. $tref{$_} .= "$lcount," if $xref{$_};
  931. }
  932. }
  933. while (($a, $b) = each(%tref)) {
  934. chop($b);
  935. $xref{$a} .= "R$fnum:$b\t";
  936. $refs++;
  937. }
  938. =cut
  939. }
  940. $totalrefs = $totalrefs + $refs;
  941. print(STDERR
  942. "Completed pass 2 IDL (",(time-$start),"s):",
  943. " $refs references to known identifiers found (total: $totalrefs).\n\n");
  944. }
  945. sub findusage {
  946. print(STDERR "Starting pass 2 C/C++: Generate reference statistics.\n");
  947. my $start = time;
  948. my $refs = 0;
  949. my $f;
  950. my $curfnum = 0;
  951. foreach $f (@f) {
  952. ++$fnum;
  953. ++$curfnum;
  954. my $lcount = 0;
  955. my %tref = ();
  956. open(SRCFILE, $realpath.$f);
  957. $_ = $/; undef($/); my $contents = <SRCFILE>; $/ = $_;
  958. close(SRCFILE);
  959. print(STDERR
  960. "(Pass 2 C/C++) $f (",length($contents),
  961. "), file $curfnum of ",$#f+1,"...\n");
  962. # Remove comments
  963. $contents =~ s/\/\*(.*?)\*\//&wash($1)/ges;
  964. $contents =~ s/\/\/[^\n]*//g;
  965. # Remove include statements
  966. $contents =~ s/^[ \t]*\#include[ \t]+[^\n]*//gm;
  967. # FIXME: "var"
  968. my @lines = split(/\n/, $contents);
  969. my $line;
  970. foreach $line (@lines) {
  971. $lcount++;
  972. foreach ($line =~ /(?:^|[^a-zA-Z_\#])($ident)\b/og) {
  973. $tref{$_} .= "$lcount," if $xref{$_};
  974. }
  975. }
  976. while (($a, $b) = each(%tref)) {
  977. chop($b);
  978. $xref{$a} .= "R$fnum:$b\t";
  979. $refs++;
  980. }
  981. }
  982. $totalrefs = $totalrefs + $refs;
  983. print(STDERR
  984. "Completed pass 2 C/C++ (",(time-$start),"s):",
  985. " $refs references to known identifiers found (total: $totalrefs).\n\n");
  986. }
  987. sub dumpdb {
  988. print STDERR "Starting stage 3: Dump database to disk.\n";
  989. my $start = time;
  990. my %xrefdb;
  991. tie (%xrefdb, "DB_File" , "xref.out.$$", O_RDWR|O_CREAT, 0664, $DB_HASH)
  992. || die("Could not open \"xref\" for writing");
  993. my $i = 0;
  994. my $k;
  995. my $v;
  996. while (($k, $v) = each(%xref)) {
  997. $i++;
  998. delete($xref{$k});
  999. $xrefdb{$k} = $v;
  1000. unless ($i % 100) {
  1001. print(STDERR "(Pass 3) identifier $i of maximum $totaldefs...\n");
  1002. }
  1003. }
  1004. untie(%xrefdb);
  1005. rename("xref.out.$$", "xref$suffix") || die "Couldn't rename xref.out.$$ to xref$suffix";
  1006. print(STDERR
  1007. "Completed stage 3 (",(time-$start),"s):",
  1008. " Information on $i identifiers dumped to disk.\n\n");
  1009. dbmclose(%fileidx);
  1010. rename("fileidx.out.$$", "fileidx$suffix")
  1011. || die "Couldn't rename fileidx.out.$$ to fileidx$suffix";
  1012. }
  1013. sub renumber {
  1014. my ($line, $fadjust) = @_;
  1015. # $xref{$3} .= "$2$fnum:$1\t";
  1016. my @refs = split /\t/, $line;
  1017. $line = '';
  1018. foreach my $v (@refs) {
  1019. $v =~ /(.)(\d+):(.*)/;
  1020. my ($kind, $fileno, $lineno) = ($1, $fadjust + $2, $3);
  1021. $line .= "$kind$fileno:$lineno\t";
  1022. }
  1023. return $line;
  1024. }
  1025. sub merge {
  1026. print STDERR "Starting stage 4: Merging database to disk.\n";
  1027. my $start = time;
  1028. tie (%fileidx, "DB_File", "fileidx.out.$$", O_RDWR|O_CREAT, 0660, $DB_HASH)
  1029. || die("Could not open \"fileidx.out.$$\" for writing");
  1030. my %xrefdb;
  1031. tie (%xrefdb, "DB_File" , "xref.out.$$", O_RDWR|O_CREAT, 0664, $DB_HASH)
  1032. || die("Could not open \"xref\" for writing");
  1033. my ($i, $k, $v);
  1034. my $fadjust = 0;
  1035. my $limit = scalar @extra;
  1036. for (my $e = 0; $e < $limit; ++$e) {
  1037. my $fix = $extra[$e];
  1038. my ($prefix, $suffix) = ("$fix/", ".$fix");
  1039. my %idx;
  1040. tie (%idx, "DB_File", "fileidx$suffix", O_RDONLY, undef, $DB_HASH)
  1041. || die("Could not open \"fileidx$suffix\" for reading");
  1042. my $f = -1;
  1043. while (($k, $v) = each(%idx)) {
  1044. $f = $k if ($k > $f);
  1045. $fileidx{$k + $fadjust} = "$prefix$v";
  1046. unless ($i % 100) {
  1047. print(STDERR "(Pass 4) file $i [$fix] of $e / $limit files...\n");
  1048. }
  1049. }
  1050. untie (%idx);
  1051. tie (%xref, "DB_File" , "xref$suffix", O_RDONLY, undef, $DB_HASH)
  1052. || die("Could not open \"xref$suffix\" for reading");
  1053. while (($k, $v) = each(%xref)) {
  1054. $i++;
  1055. $xrefdb{$k} .= renumber($v, $fadjust);
  1056. unless ($i % 100) {
  1057. print(STDERR "(Pass 4) identifier $i [$fix] of $e / $limit files...\n");
  1058. }
  1059. }
  1060. untie (%xref);
  1061. $fadjust += $f + 1;
  1062. }
  1063. untie(%xrefdb);
  1064. rename("xref.out.$$", "xref") || die "Couldn't rename xref.out.$$ to xref";
  1065. print(STDERR
  1066. "Completed stage 4 (",(time-$start),"s):",
  1067. "Information on $i identifiers dumped to disk.\n\n");
  1068. untie(%fileidx);
  1069. rename("fileidx.out.$$", "fileidx")
  1070. || die "Couldn't rename fileidx.out.$$ to fileidx";
  1071. foreach my $fix (@extra) {
  1072. my ($suffix) = (".$fix");
  1073. }
  1074. }
  1075. sub buildList {
  1076. my ($fspre, $fspost) = ('', '');
  1077. if ($suffix ne '') {
  1078. $fspost = "! -type l ! -path '*/.git/*' ! -path '*/.hg/*' ! -path '*/.svn/*' ! -path '*/CVS/*'";
  1079. } else {
  1080. $fspre = '-L';
  1081. }
  1082. open(FILES, "find $fspre $realpath $fspost -type f -print |");
  1083. print(STDERR "Starting pass 0: Checking for files to index.\n",
  1084. "Looking in $realpath.\n");
  1085. while (my $file = <FILES>) {
  1086. $file =~ s/^\Q$realpath\E|\s+$//og;
  1087. # Duplicated in lib/LXR/Common.pm
  1088. if ($file =~ /\.(?:(hh?|cpp?|c[cs]?|fin|tbl|ipdlh?)|(java)|(jsm?)(?:\.in|)|(p[lm]|cgi)(?:\.in|)|(idl)|(xml))$/i) {
  1089. if ($1) {
  1090. push @ft, 0;
  1091. push @f, $file;
  1092. } elsif ($2) {
  1093. push @ft, 1;
  1094. push @f, $file;
  1095. } elsif ($3) {
  1096. if (($file =~ m!/Regress/!) || ($file =~ m!kraken/tests/kraken!)) {
  1097. print(STDERR "Skipping $file\n");
  1098. } else {
  1099. push @jsfiles, $file;
  1100. }
  1101. } elsif ($4) {
  1102. push @plfiles, $file;
  1103. } elsif ($5) {
  1104. push @idlfiles, $file;
  1105. } elsif ($6) {
  1106. push @xblfiles, $file;
  1107. }
  1108. }
  1109. }
  1110. close FILES;
  1111. }
  1112. # Stage 0: build file list
  1113. # Stage 1: find identifiers
  1114. # - this is pass 1 over the file content
  1115. # Stage 2: find references
  1116. # - this is pass 2 over the file content
  1117. # Stage 3: dump the database
  1118. # - we run from 0..3 for a "default" action
  1119. # Stage 4: merge databases
  1120. # - we only do this stage for a "merge" action
  1121. sub buildIndex {
  1122. tie (%fileidx, "DB_File", "fileidx.out.$$", O_RDWR|O_CREAT, 0660, $DB_HASH)
  1123. || die("Could not open \"fileidx.out.$$\" for writing");
  1124. buildList();
  1125. print 'Stage 0 IDL file count is : ' . scalar(@idlfiles) . '
  1126. Stage 0 C/C++ file count is : ' . scalar(@f) . '
  1127. Stage 0 JS file count is : ' . scalar(@jsfiles) . '
  1128. Stage 0 PL file count is : ' . scalar(@plfiles) . '
  1129. ';
  1130. $fnum = 0;
  1131. # we need to search for IDL identifiers first because they can be
  1132. # the only prototype
  1133. # XXX i'm not certain this is necessary, oh well.
  1134. &findidentIDL;
  1135. print "Stage 1 IDL XREF keycount is : " . scalar(keys %xref) . "\n";
  1136. &findident;
  1137. print "Stage 1 C/C++ XREF keycount is : " . scalar(keys %xref) . "\n";
  1138. &findidentJS;
  1139. print "Stage 1 C/C++/JS XREF keycount is : " . scalar(keys %xref) . "\n";
  1140. &findidentPL;
  1141. print "Stage 1 C/C++/JS/PL XREF keycount is : " . scalar(keys %xref) . "\n";
  1142. &findidentXBL;
  1143. print "Stage 1 C/C++/JS/PL/XBL XREF keycount is : " . scalar(keys %xref) . "\n";
  1144. $fnum = 0;
  1145. &findusageIDL;
  1146. print "Stage 2 IDL XREF keycount is : " . scalar(keys %xref) . "\n";
  1147. &findusage;
  1148. print "Stage 2 C/C++ XREF keycount is : " . scalar(keys %xref) . "\n";
  1149. &findusageJS;
  1150. print "Stage 2 C/C++/JS XREF keycount is : " . scalar(keys %xref) . "\n";
  1151. &findusagePL;
  1152. print "Stage 2 C/C++/JS/PL keycount is : " . scalar(keys %xref) . "\n";
  1153. &dumpdb;
  1154. }
  1155. for ($verb) {
  1156. /^default$/ && do {
  1157. # Build database for directory.
  1158. # Files are parsed using whole file regexp's.
  1159. # This is SLOW.
  1160. buildIndex();
  1161. last;
  1162. };
  1163. /^merge$/ && do {
  1164. # Merge databases
  1165. unshift @extra, $fix;
  1166. merge();
  1167. last;
  1168. };
  1169. }