#!/usr/bin/perl -w # if ($ARGV[0] eq "") { $tupel = 1; } else { $tupel = $ARGV[0]; } %hash = (); warn "Getting text...\n"; while () { $test .= $_ . " "; } warn "Text stored.\n"; warn "Filtering real text.\n"; $test = lc($test); $test =~ s/[.:,;#+*'!"§$%&\/\\()]/ /g; $test =~ s/[^a-zäöüß ]//g; $test =~ s/ {2,}/ /g; $test =~ s/ /_/g; warn "Filter complete.\n"; warn "Counting tuples.\n"; $all = 0; $i = 0; while ($i+$tupel <= length($test)) { $sub = substr($test, $i, $tupel); $base = substr($sub, 0, $tupel-1); $successor = substr($sub, $tupel-1, 1); $hash{ $base }{ "ALL" }++; $hash{ $base }{ $successor }++; $all++; $i++; } warn "Counting complete.\n"; @keys = sort keys %hash; warn "Sorting complete.\n"; ### printing results print "$tupel $all \n"; foreach $k1 (@keys) { print "$k1 ALL $hash{$k1}{'ALL'}\n"; foreach $k2 (sort keys %{$hash{$k1}} ) { print "$k1 $k2 $hash{$k1}{$k2}\n" if ($k2 ne 'ALL'); } }