Algorithm-TicketClusterer

 view release on metacpan or  search on metacpan

lib/Algorithm/TicketClusterer.pm  view on Meta::CPAN

package Algorithm::TicketClusterer;

#---------------------------------------------------------------------------
# Copyright (c) 2014 Avinash Kak. All rights reserved.  This program is
# free software.  You may modify and/or distribute it under the same terms
# as Perl itself.  This copyright notice must remain attached to the file.
#
# Algorithm::TicketClusterer is a Perl module for retrieving Excel-stored
# past tickets that are most similar to a new ticket.  Tickets are commonly
# used in software services industry and customer support businesses to
# record requests for service, product complaints, user feedback, and so
# on.
# ---------------------------------------------------------------------------

use 5.10.0;
use strict;
use warnings;
use Carp;
use Storable;
use Spreadsheet::ParseExcel;
use Spreadsheet::XLSX;
use WordNet::QueryData;
use Text::Iconv;
use SDBM_File;
use Fcntl;

our $VERSION = '1.01';

############################### The Constructor #############################

sub new { 
    my ($class, %args) = @_;
    my @params = keys %args;
    croak "\nYou have used a wrong name for a keyword argument " .
          "--- perhaps a misspelling\n" 
          if _check_for_illegal_params(@params) == 0;
    bless {
        _excel_filename         =>   $args{excel_filename}, 
        _which_worksheet        =>   $args{which_worksheet},
        _raw_tickets_db         =>   $args{raw_tickets_db}, 
        _processed_tickets_db   =>   $args{processed_tickets_db}, 
        _synset_cache_db        =>   $args{synset_cache_db}, 
        _stemmed_tickets_db     =>   $args{stemmed_tickets_db}, 
        _inverted_index_db      =>   $args{inverted_index_db},
        _tickets_vocab_db       =>   $args{tickets_vocab_db},
        _idf_db                 =>   $args{idf_db}, 
        _tkt_doc_vecs_db        =>   $args{tkt_doc_vecs_db},
        _tkt_doc_vecs_normed_db =>   $args{tkt_doc_vecs_normed_db},
        _clustering_fieldname   =>   $args{clustering_fieldname}, 
        _unique_id_fieldname    =>   $args{unique_id_fieldname}, 
        _stop_words_file        =>   $args{stop_words_file},
        _misspelled_words_file  =>   $args{misspelled_words_file},
        _min_word_length        =>   $args{min_word_length} || 4,
        _add_synsets_to_tickets =>   $args{add_synsets_to_tickets} || 0,
        _want_stemming          =>   $args{want_stemming} || 0,
        _how_many_retrievals    =>   $args{how_many_retrievals} || 5,
        _min_idf_threshold      =>   $args{min_idf_threshold},
        _max_num_syn_words      =>   $args{max_num_syn_words} || 3,
        _want_synset_caching    =>   $args{want_synset_caching} || 0,
        _stop_words             =>   {},
        _all_tickets            =>   [],
        _column_headers         =>   [],
        _good_columns           =>   [],
        _tickets_by_ids         =>   {},
        _processed_tkts_by_ids  =>   {},
        _stemmed_tkts_by_ids    =>   {},



( run in 1.273 second using v1.01-cache-2.11-cpan-e1769b4cff6 )