#!/usr/bin/perl

# script was written ages ago, sometime around 2008.

use warnings;
use strict;

use Irssi qw{
	signal_add_last signal_add
	command_bind
	settings_get_int settings_add_int
	settings_get_str settings_add_str
};

our $VERSION = "0.1";
our %IRSSI = (
	authors     => 'Urchlay',
	contact     => 'Urchlay on NewNet',
	name        => 'complete_text',
	description => 'Create a dictionary from channel/msg text, ' .
	               'use for word completion',
	license     => 'Same as Perl',
	url         => 'none',
);

# TODO:
# - allow user to save the dictionary and reload
# - allow user to remove words from dictionary
# - better case-matching: store words in their original case or lowercase
#   (user setting). If $word is initial cap, complete to full original
#   case (so in #atari, A<tab> would be AtariSIO, not Atarisio)
# - add channel topic and maybe /quit msg
# - maybe expire old words based on use frequency?
# - typo support like irccomplete?

settings_add_int('complete_text', 'complete_dict_size', 5000);
settings_add_str('complete_text', 'complete_preload', "/usr/dict/words");

our $limit = settings_get_int('complete_dict_size');
our %seen;
our @dict;
our %static_seen;
our @static_dict;

sub complete_word {
	my ($complist, $window, $word, $linestart, $want_space) = @_;

	my $prefix = $1 if $word =~ s/^(.*\b)(\w)/$2/;
	my $initial_cap = ($word =~ /^[A-Z]/);

	$word = quotemeta $word; # 20080723 bkw: d'oh!
	push @$complist, map { $_ = $prefix . $_ } grep { $_ =~ /^$word/i } @dict;
	push @$complist, map { $_ = $prefix . $_ } grep { $_ =~ /^$word/i } @static_dict;

	if($initial_cap) {
		s/^(.)/uc($1)/e for @$complist;
	}
}

sub add_to_dict {
	for(lc($_[1]) =~ /(\w{4,})/g) {
		s/^_(\w+)_$/$1/;
		s/^\*(\w+)\*$/$1/;

		next if $static_seen{$_};
		next if $seen{$_}++;

		push @dict, $_;
		if(@dict >= $limit) {
			my $old = shift @dict;
			delete $seen{$old};
		}
	}
}

sub dumpdict {
	print "dynamic dictionary";
	print for @dict;
	print scalar keys %seen;
	print scalar @dict;
	print "\nstatic dictionary";
#	print for sort keys %static_seen;
	print scalar keys %static_seen;
}

sub load_static_seen {
	%static_seen = ();
	@static_dict = ();

	my @files = split /:/, settings_get_str('complete_preload');

	for(@files) {
		print "preloading words from $_";
		s/^~/$ENV{HOME}/;
		print "$_ not found, skipping", next unless -f $_;

		open my $f, "<$_" or do { print "$_: $!"; next; };
		while(<$f>) {
			chomp;
			next unless $_;
			$static_seen{$_}++;
		}

		close $f;

		@static_dict = sort {
			(length $a <=> length $b) || ($a cmp $b)
		} keys %static_seen;
	}

	print "preloaded " . (scalar keys %static_seen) . " words";
}

sub setup_changed {
	my $new_limit = settings_get_int('complete_dict_size');

	if($new_limit < @dict) {
		#print "deleting " . (@dict - $new_limit) . " elements";
		for(my $i=0; $i < (@dict - $new_limit); $i++) {
			my $old = shift @dict;
			delete $seen{$old};
		}
	}

	$limit = $new_limit;

	load_static_seen();
}

load_static_seen();
signal_add_last('setup changed', \&setup_changed);
signal_add_last('complete word', \&complete_word);
command_bind('dumpdict', \&dumpdict);

for(
		"message public", "message private",
		"message own_public", "message own_private",
		"message dcc", "message dcc own",
		"message dcc own_action", "message dcc action")
{
	signal_add_last($_, \&add_to_dict);
}