#!/usr/bin/perl -w
use strict;

my %graph;
my %printed;
my $nodeformat;
my $file;
my $UpperLetter;
my $LowerLetter;
my $AnyLetter;
my $LpA;
my $LinkPattern;
my $QDelim;
my $topic;
my $line;
my @files;
my @words;


$UpperLetter = "[A-Z";
$LowerLetter = "[a-z";
$AnyLetter   = "[A-Za-z";
$UpperLetter .= "\xc0-\xde";
$LowerLetter .= "\xdf-\xff";
$AnyLetter   .= "\xc0-\xff";
$AnyLetter .= "_0-9";
$UpperLetter .= "]"; $LowerLetter .= "]"; $AnyLetter .= "]";

# Main link pattern: lowercase between uppercase, then anything
$LpA = $UpperLetter . "+" . $LowerLetter . "+" . $UpperLetter . $AnyLetter . "*";
$LinkPattern = "($LpA)";
$QDelim = '(?:"")?';     # Optional quote delimiter (not in output)
$LinkPattern .= $QDelim;


$nodeformat='[shape=box, fontsize=144]';


$file="LarpWiki.txt";

@files=split /\n/,`ls -1 files/*.txt`;

foreach $file (@files) {
  print "$file\n";
  getwikiwords($file);
}


open G,">graph.dot" or die "error opening graph.dot";
print G "digraph G {\nsize=\"7,10\"\npage=\"8.27,11.69\"\nratio=\"fill\"\n";

foreach (keys %graph)
  {
    $a=$_;
    $b=$_;
    $a=~ s/^(.*)#(.*)$/$1/;
    $b=~ s/^(.*)#(.*)$/$2/;
    if ($graph{$_} == 1)
      {
	print G "\"$a\" -> \"$b\";\n";
	nodeprint($a);
	nodeprint($b);
      }
    if ($graph{$_} == 2)
      {
	print G "\"$a\" -> \"$b\" [dir=\"both\"];\n";
	nodeprint($a);
	nodeprint($b);
      }
    
  }

print G "}\n";
close G;

##########################################

sub getwikiwords{

  $topic=$_[0];
  open F,$topic or die "error opening $topic";

  $topic =~ s/.txt$//;
  $topic =~ s/^files\///;


  while ($line=<F>)
    {

      @words=split /\b/,$line;
      foreach (@words) {
	if ($_ =~ /^$LinkPattern$/ ) {
	  unless ($graph{"$_#$topic"})
	    {
	      $graph{"$_#$topic"}=1;
	      if ($graph{"topic#$_"})
		{
		  $graph{"$_#$topic"}=2;
		  $graph{"$topic#$_"}=3;
		}
	    }
	}
      }
    }
  close F;
}

sub nodeprint{
  unless ($printed{$_[0]}) {
    print G "\"$_[0]\" $nodeformat;\n";
    $printed{$_[0]}=1;
  }
}
