Algorithm-VSM

 view release on metacpan or  search on metacpan

examples/calculate_precision_and_recall_for_LSA.pl  view on Meta::CPAN


my $lsa = Algorithm::VSM->new( 
                   break_camelcased_and_underscored  => 1,  # default: 0
                   case_sensitive      => 0,                # default: 0 
                   corpus_directory    => $corpus_dir,
                   file_types          => ['.txt', '.java'],
                   lsa_svd_threshold   => 0.05,     # Used for rejecting singular
                                                    # values that are smaller than
                                                    # this threshold fraction of
                                                    # the largest singular value.
                   min_word_length     => 4,
                   query_file          => $query_file,
                   relevancy_file      => $relevancy_file,   # Relevancy judgments
                                                             # are deposited in 
                                                             # this file.
                   relevancy_threshold => 5,    # Used when estimating relevancies
                                                # with the method 
                                                # estimate_doc_relevancies().  A
                                                # doc must have at least this 
                                                # number of query words to be
                                                # considered relevant.

examples/calculate_precision_and_recall_for_VSM.pl  view on Meta::CPAN

                                               # must be as shown in test_queries.txt

my $relevancy_file   = "relevancy.txt";        # The generated relevancies will
                                               # be stored in this file.

my $vsm = Algorithm::VSM->new( 
                   break_camelcased_and_underscored  => 1,  #default: 1
                   case_sensitive      => 0,                # default: 0 
                   corpus_directory    => $corpus_dir,
                   file_types          => ['.txt', '.java'],
                   min_word_length     => 4,
                   query_file          => $query_file,
                   relevancy_file      => $relevancy_file,   # Relevancy judgments
                                                             # are deposited in 
                                                             # this file.
                   relevancy_threshold => 5,    # Used when estimating relevancies
                                                # with the method 
                                                # estimate_doc_relevancies().  A
                                                # doc must have at least this 
                                                # number of query words to be
                                                # considered relevant.

examples/calculate_precision_and_recall_from_file_based_relevancies_for_LSA.pl  view on Meta::CPAN


my $lsa = Algorithm::VSM->new( 
                   break_camelcased_and_underscored  => 1,  # default: 1
                   case_sensitive      => 0,                # default: 0 
                   corpus_directory    => $corpus_dir,
                   file_types          => ['.txt', '.java'],
                   lsa_svd_threshold   => 0.01,     # Used for rejecting singular
                                                    # values that are smaller than
                                                    # this threshold fraction of
                                                    # the largest singular value.
                   min_word_length     => 4,
                   query_file          => $query_file,
                   relevancy_file      => $relevancy_file,
                   stop_words_file     => $stop_words_file,
                   want_stemming       => 1,                # default: 0
          );

$lsa->get_corpus_vocabulary_and_word_counts();

$lsa->generate_document_vectors();

examples/calculate_precision_and_recall_from_file_based_relevancies_for_VSM.pl  view on Meta::CPAN

                                               # must be as shown in test_queries.txt

my $relevancy_file   = "relevancy.txt";        # The humans-supplied relevancies
                                               # will be read from this file.

my $vsm = Algorithm::VSM->new( 
                   break_camelcased_and_underscored  => 1,  # default: 1
                   case_sensitive      => 0,                # default: 0 
                   corpus_directory    => $corpus_dir,
                   file_types          => ['.txt', '.java'],
                   min_word_length     => 4,
                   query_file          => $query_file,
                   relevancy_file      => $relevancy_file,
                   stop_words_file     => $stop_words_file,
                   want_stemming       => 1,                # default: 0
          );

$vsm->get_corpus_vocabulary_and_word_counts();

$vsm->generate_document_vectors();

examples/calculate_similarity_matrix_for_all_docs.pl  view on Meta::CPAN

use Text::CSV;

my $corpus_dir = "minicorpus";
my $stop_words_file = "stop_words.txt";   

my $vsm = Algorithm::VSM->new( 
                   break_camelcased_and_underscored  => 1,  # default: 1
                   case_sensitive           => 0,           # default: 0 
                   corpus_directory         => $corpus_dir,
                   file_types               => ['.txt', '.java'],
                   min_word_length          => 4,
                   stop_words_file          => $stop_words_file,
                   want_stemming            => 1,           # default: 0
          );

$vsm->get_corpus_vocabulary_and_word_counts();

$vsm->generate_document_vectors();

#    If you would like to directly measure the similarity between two
#    specific documents, uncomment the following two statements.

examples/calculate_similarity_matrix_for_all_normalized_docs.pl  view on Meta::CPAN

use Text::CSV;

my $corpus_dir = "minicorpus";
my $stop_words_file = "stop_words.txt";   

my $vsm = Algorithm::VSM->new( 
                   break_camelcased_and_underscored  => 1,  # default: 1
                   case_sensitive           => 0,           # default: 0 
                   corpus_directory         => $corpus_dir,
                   file_types               => ['.txt', '.java'],
                   min_word_length          => 4,
                   stop_words_file          => $stop_words_file,
                   want_stemming            => 1,           # default: 0
          );

$vsm->get_corpus_vocabulary_and_word_counts();
$vsm->generate_document_vectors();

#    If you would like to directly measure the similarity between two
#    specific documents, uncomment the following two statements.
#    Obviously, you will have to change the arguments to suit your needs.

examples/continuously_running_VSM_retrieval_engine.pl  view on Meta::CPAN


my $stop_words_file = "stop_words.txt";    # This file will typically include the
                                           # keywords of the programming 
                                           # language(s) used in the software.
my $vsm = Algorithm::VSM->new( 
                   break_camelcased_and_underscored  => 1,  # default: 1
                   case_sensitive           => 0,           # default: 0
                   corpus_directory         => $corpus_dir,
                   file_types               => ['.txt', '.java'],
                   max_number_retrievals    => 10,
                   min_word_length          => 4,
                   stop_words_file          => $stop_words_file,
                   use_idf_filter           => 1,
                   want_stemming            => 1,           # default: 0
          );

$vsm->get_corpus_vocabulary_and_word_counts();
$vsm->generate_document_vectors();

while (1) {
    print "\nEnter your query in the next line (or just press `Enter' to exit):\n\n";
    my $query_string = <STDIN>;
    $query_string =~ s/\r?\n?$//;
    $query_string =~ s/(^\s*)|(\s*$)//g;
    die "... exiting: $!" if length($query_string) == 0;
    my @query = grep $_, split /\s+/, $query_string;
    my $retrievals = eval {
        $vsm->retrieve_with_vsm( \@query );
    };
    if ($@) {
        print "$@\n";
    } else {
        $vsm->display_retrievals( $retrievals );
    }
}

examples/corpus/AbstractShapeIncremental.java  view on Meta::CPAN


class Test {
    public static void main( String[] args )
    {
        Shape[] shapes = new Shape[ 3 ];
        shapes[0] = new Circle( 2.0 );
        shapes[1] = new Rectangle( 1.0, 3.0 );
        shapes[2] = new Rectangle( 4.0, 2.0 );

        double total_area = 0;
        for (int i=0; i < shapes.length; i++ )
          total_area += shapes[i].area();
        System.out.println("Total area = " + total_area);
    }
}

examples/corpus/AddArray.java  view on Meta::CPAN


    public static void main( String[] args )                      //(B)
    {
        int[] data = { 0, 1, 2, 3, 4, 5, 9, 8, 7, 6 };            //(C)
        System.out.println( "The sum is: "                        //(D)
                                 + addArray(data) );  
    }

    public static int addArray( int[] a ) {                       //(E)
        int sum = 0;                               
        for ( int i=0; i < a.length; i++ )            
            sum += a[i];                             
        return sum;                                
    }
}          

examples/corpus/ArrayBasic.java  view on Meta::CPAN

    int age;
    public User( String nam, int yy ) {
        name = nam;
        age = yy;
    }
}

class Test {
    public static void main( String[] args ) {
        User[] user_list = new User[ 4 ];
        for ( int i=0; i<user_list.length; i++ )
            System.out.print( user_list[ i ] + "  " ); 
                                       // null null null null
    }
}

examples/corpus/ArraysFill.java  view on Meta::CPAN


//ArraysFill.java

import java.util.*;

class Test {
    public static void main( String[] args ) {

        int[] intArr = new int[4];
        Arrays.fill( intArr, 99 );                                //(A)
        for ( int i=0; i<intArr.length; i++ )
            System.out.print( intArr[ i ] + " " );  // 99 99 99 99
        System.out.println();

        double[] dbArr = new double[4];
        Arrays.fill( dbArr, 2, 3, 9.9 );                          //(B)
        for ( int i=0; i<dbArr.length; i++ )
            System.out.print( dbArr[ i ] + " " );   // 0.0 0.0 9.9 0.0
        System.out.println();

        int pos = Arrays.binarySearch( dbArr, 9.9 );              //(C)
        System.out.println( pos );                  // 2
    }
}

examples/corpus/ArraysShuffle.java  view on Meta::CPAN


//ArraysShuffle.java

import java.util.*;

class Test {
    public static void main( String[] args ) {

        Integer[] intArr2 = new Integer[10];                      //(A)

        for ( int i=0; i<intArr2.length; i++ )                    //(B)
            intArr2[i] = new Integer(i);

        List list = Arrays.asList( intArr2 );                     //(C)

        Collections.shuffle( list );                              //(D)

        Integer[] intArr3 = (Integer[]) list.toArray();           //(E)

        for ( int i=0; i<intArr2.length; i++ )                    //(F)
            System.out.print( intArr3[ i ].intValue() + " " );
                    // 9 8 5 1 3 4 7 2 6 0  (different with each run)
        System.out.println();      
    }
}

examples/corpus/CloneArray.java  view on Meta::CPAN

    public Object clone() throws CloneNotSupportedException {     //(B)
        X xob = null;
        xob = (X) super.clone();
        //now clone the array separately:
        xob.arr = (int[]) arr.clone();                            //(C)
        return xob;
    }

    public String toString() {
        String printstring = "";
        for (int i=0; i<arr.length; i++) printstring += " " + arr[i];
        return printstring;
    }

    public static void main( String[] args ) throws Exception {
        X xobj = new X();
        X xobj_clone = (X) xobj.clone();                          //(D)       

        System.out.println( xobj );           // 0 4 5 2 5
        System.out.println( xobj_clone );     // 0 4 5 2 5

examples/corpus/CloneClassTypeArr.java  view on Meta::CPAN

    }        
    public String toString() { return x + ""; }
}

class Z implements Cloneable {
    public Y[] yarr;
    public Z( Y[] arr ) { this.yarr = arr; }
    public Object clone() throws CloneNotSupportedException {     //(A)
        Z zclone = (Z) super.clone();
        // zclone.yarr = ( Y[] ) yarr.clone();      // WRONG      //(B)     
        Y[] yarrClone = new Y[ yarr.length ];                     //(C)
        for (int i=0; i < yarr.length; i++ )
            yarrClone[i] = (Y) yarr[i].clone();                   //(D)
        zclone.yarr = yarrClone;                                  //(E)
        return zclone;
    }
    public String toString() {
        String superString = "";
        for ( int i = 0; i < yarr.length; i++ ) {
            superString += yarr[i] + "  ";
        }
        return superString;
    }
}


class Test {
    public static void main( String[] args ) throws Exception
    {

examples/corpus/CrazyWindow.java  view on Meta::CPAN

        contentPane.setLayout(new GridLayout(1, 2));
        panel1 = new MyTextPanel();                               //(C)
        panel2 = new MyDrawPanel();                               //(D)
        contentPane.add( panel1 );  
        contentPane.add( panel2 );
        setContentPane( contentPane );
    }

    class MyTextPanel extends JPanel  {
        class MyDocumentListener implements DocumentListener {    //(E)
            int lengthText;
            StringBuffer word = new StringBuffer("");             //(F)
            public void insertUpdate( DocumentEvent e ) {         //(G)
                Document doc = (Document) e.getDocument();
                try {
                    lengthText = doc.getLength();
                    String currentChar = 
                                 doc.getText( lengthText - 1, 1 );
                    char ch = 
                       currentChar.charAt( currentChar.length() - 1 );
                    if ( currentChar.equals( " " ) || ch == '\n' ) {
                        if ( word.toString().equals( "red" ) ) {  //(H)
                            panel2.drawColoredSquare( "red" );    //(I)
                        }
                        if ( word.toString().equals( "green" ) ) {
                            panel2.drawColoredSquare( "green" );
                        }
                        if ( word.toString().equals( "blue" ) ) {
                            panel2.drawColoredSquare( "blue" );
                        }

examples/corpus/CrazyWindow.java  view on Meta::CPAN

                    }
                    else                                          //(J)
                        word = word.append( currentChar );
                } catch( BadLocationException bad ) { 
                    bad.printStackTrace(); 
                }
            }
            public void removeUpdate( DocumentEvent e )  {        //(K)
                try {
                  Document doc = (Document) e.getDocument();
                  lengthText = doc.getLength();
  
                  String currentChar = 
                                  doc.getText( lengthText - 1, 1 );
                  char ch = 
                    currentChar.charAt( currentChar.length() - 1 );
                  if ( currentChar.equals( " " ) || ch == '\n'  ) {
                      word = new StringBuffer();                  //(L)
                  }
                  else if ( word.length() >= 1 )
                      word = 
                        word.deleteCharAt( word.length() - 1 );   //(M)
                } catch( BadLocationException bad ) { 
                    bad.printStackTrace(); 
                }
            }
            public void changedUpdate( DocumentEvent e ) {}       //(N) 
        }

        public MyTextPanel() {
             JTextArea ta = new JTextArea( 100, 60);
             ta.getDocument().addDocumentListener( 

examples/corpus/EventThreadDemo.java  view on Meta::CPAN

                       areaScrollPane, BorderLayout.CENTER );
        frame.pack();
        frame.setVisible( true );
        keepBusy( 500, "main" );                                  //(A)
    }

    static class MyDocumentListener implements DocumentListener {
        public void insertUpdate( final DocumentEvent e ) {
            String str = null;
            Document doc = e.getDocument();
            int lengthText = doc.getLength();
            try {
                str = doc.getText( lengthText - 1, 1 );
            } catch( BadLocationException badloc ) { 
                        badloc.printStackTrace(); 
            }
            keepBusy( 500, "MyDocumentListener" );                //(B)
            System.out.print( str );
        }
        public void removeUpdate(DocumentEvent e) { }
        public void changedUpdate(DocumentEvent e) { }
    }

examples/corpus/EventThreadDemo2.java  view on Meta::CPAN

        while ( System.currentTimeMillis() < curr + howLong )
            ;
    }
}

//////////////////////  class MyDocumentListener  /////////////////////
class MyDocumentListener implements DocumentListener {
        public void insertUpdate( final DocumentEvent e ) {
            String str = null;
            Document doc = e.getDocument();
            int lengthText = doc.getLength();
            try {
                str = doc.getText( lengthText - 1, 1 );
            } catch( BadLocationException badloc ) { 
                        badloc.printStackTrace(); 
            }
            MyTools.printThreadInfo("From iniside the listener:");//(O)    
            MyTools.keepBusy( 500 );
            System.out.print( str );
        }
        public void removeUpdate(DocumentEvent e) { }
        public void changedUpdate(DocumentEvent e) { }
}

examples/corpus/ExtendedInterface.java  view on Meta::CPAN

        shapes[0] = dc;
        shapes[1] = dr1;
        shapes[2] = dr2;

        drawScalables[0] = dc;
        drawScalables[1] = dr1;
        drawScalables[2] = dr2;

        int total_area = 0;
        DrawWindow dw = new DrawWindow();
        for (int i = 0; i < shapes.length; i++ ) {
            total_area += shapes[i].area();
            drawScalables[i].setPosition( i*10.0, i*10.0 );  
            drawScalables[i].drawScaledShape( 2, dw );       
        }
        System.out.println("Total area = " + total_area);         //(E)
    }
}

examples/corpus/FileCopy.java  view on Meta::CPAN

import java.io.*;                                                 //(A)

class FileCopy {                                                  //(B)

    public static void main( String[] args )                      //(C)
    {
        int ch = 0;                              
        FileInputStream in = null;                                //(D)
        FileOutputStream out = null;                              //(E)
  
        if ( args.length != 2 ) {                                 //(F)
            System.err.println( "usage: java FileCopy source dest" );
            System.exit( 0 );
        }
        try {                                    
            in = new FileInputStream( args[0] );                  //(G)
            out = new FileOutputStream( args[1] );                //(H)
     
            while ( true ) {                       
                ch = in.read();                                   //(I)
                if (ch == -1) break;                   

examples/corpus/FontFamilies.java  view on Meta::CPAN


class FontFamilies extends JPanel {

    public void paintComponent( Graphics g ) {
        super.paintComponent( g );
        g.translate( getInsets().left, getInsets().top );
        GraphicsEnvironment ge = 
            GraphicsEnvironment.getLocalGraphicsEnvironment();
        String[] fontList = ge.getAvailableFontFamilyNames();   
        Font defaultFont = g.getFont();
        for (int i = 0; i < fontList.length; i++ ) {
            g.setFont( defaultFont );
            g.drawString( fontList[ i ], 10, i * 14 );
            Font f = new Font( fontList[ i ], Font.PLAIN, 12 );
            g.setFont( f );
            g.drawString( "Purdue", 200 , i * 14 );
        }
    }

    public static void main( String[] args ) {
        JFrame f = new JFrame();

examples/corpus/Interleaved.java  view on Meta::CPAN

        String str = "";
        if ( dogs != null ) {
            str += "\nDOGS: ";
            for ( int i=0; i<dogs.size(); i++ ) {
                str += (Dog) dogs.elementAt(i);
            }       
            str += "\n";
        }
        if ( autos != null ) {
            str += "\nAUTOS: ";
            for ( int i=0; i<autos.length - 1; i++ ) {
                str += " " + autos[i] + ",";
            }       
            str += " " + autos[autos.length - 1];  
            str += "\n";
        }
        if ( friends != null ) {
            str += "\nFRIENDS:";
            for ( int i=0; i<friends.length; i++ ) {
                str += "\n";
                str += friends[i].getFirstName();
                str += " " + friends[i].getLastName();
            }       
            str += "\n";
        }
        if ( kitty != null ) {
            str += "\nCAT:";
            str += kitty;
        }

examples/corpus/Manager.java  view on Meta::CPAN

        String str = "";
        if ( dogs != null ) {
            str += "\nDOGS: ";
            ListIterator iter = dogs.listIterator();
            while ( iter.hasNext() )
                str += (Dog) iter.next();
            str += "\n";
        }
        if ( autos != null ) {
            str += "\nAUTOS: ";
            for ( int i=0; i<autos.length - 1; i++ ) {
                str += " " + autos[i] + ",";
            }       
            str += " " + autos[autos.length - 1];
            str += "\n";
        }
        if ( friends != null ) {
            str += "\nFRIENDS:";
            for ( int i=0; i<friends.length; i++ ) {
                str += "\n";
                str += friends[i].getFirstName();
                str += " " + friends[i].getLastName();
            }       
            str += "\n";
        }
        if ( kitty != null ) {
            str += "\nCAT:";
            str += kitty;
        }

examples/corpus/MultiPolymorphism.java  view on Meta::CPAN

        shapes[0] = dc;
        shapes[1] = dr1;
        shapes[2] = dr2;

        drawables[0] = dc;
        drawables[1] = dr1;
        drawables[2] = dr2;

        int total_area = 0;
        DrawWindow dw = new DrawWindow();
        for (int i = 0; i < shapes.length; i++ ) {
            total_area += shapes[i].area();
            drawables[i].setPosition( i*10.0, i*10.0 );           //(C)
            drawables[i].draw( dw );                              //(D)
        }
        System.out.println("Total area = " + total_area);         //(E)
    }
}

examples/corpus/RenderGraphics.java  view on Meta::CPAN

                width = getWidth();
                height = getHeight();
                int stringY = height - 10;
                rectWidth = width - 20;
                rectHeight = stringY - maxCharHeight - 10;
                int x2Points[] = 
                      {10, 10+rectWidth, 10, 10+rectWidth};
                int y2Points[] = 
                      {10, 10+rectHeight, 10+rectHeight, 10};
                g.drawPolyline(x2Points, 
                      y2Points, x2Points.length);                 //(F)
                g.drawString("drawPolyline", 10, stringY);        //(G)
            }
            });

        //rounded rectangle:
        contentPane.add( new JPanel() {
            public void paintComponent(Graphics g) {
                super.paintComponent(g);  
                width = getWidth();
                height = getHeight();

examples/corpus/StringFind.java  view on Meta::CPAN

class StringFind {

    public static void main( String[] args ) {

       StringBuffer strbuf = new StringBuffer( 

                    "one hello is like any other hello" );

       String searchString = "hello";

       String replacementString = "armadillo";

       int pos = 0;

       while ( ( pos = (new String(strbuf)).indexOf( 

                                searchString, pos ) )  != -1 ) {

           strbuf.replace( pos, pos + 

                            searchString.length(), replacementString );

           pos++;

       }

       System.out.println( strbuf );

    }

}

examples/corpus/StringSort.java  view on Meta::CPAN

import java.util.*;



class StringSort {

    public static void main( String[] args ) {

        String[] strArr = { "apples", "bananas", "Apricots", "Berries", 

                            "oranges", "Oranges", "APPLES", "peaches" };

        String[] strArr2 = strArr;



        System.out.println("Case sensitive sort :" );

        Arrays.sort( strArr );

        for (int i=0; i<strArr.length; i++)

            System.out.println( strArr[i] );



        System.out.println("\nCase insensitive sort:" );

        Arrays.sort( strArr2, String.CASE_INSENSITIVE_ORDER );

        for (int i=0; i<strArr2.length; i++)

            System.out.println( strArr2[i] );

    }

}

examples/corpus/WindowWithMenu.java  view on Meta::CPAN

        }
        if ( arg.equals( "Save" ) ) {         
            saveDialog.setDirectory(".");
            saveDialog.show();
            filename = saveDialog.getFile();
            String superString = ta.getText();                    //(L)
            if (filename != null) {
                try {
                    FileOutputStream fout = 
                            new FileOutputStream( filename );
                    for (int i=0; i<superString.length(); i++)
                        fout.write( superString.charAt(i) );      //(M)
                    fout.close();
                } catch( IOException e ) { 
                    System.out.println( "IO error" ); 
                }
            }
        }
    }

    public static void main(String[] args){

examples/corpus_with_java_and_cpp/AbstractShapeIncremental.java  view on Meta::CPAN


class Test {
    public static void main( String[] args )
    {
        Shape[] shapes = new Shape[ 3 ];
        shapes[0] = new Circle( 2.0 );
        shapes[1] = new Rectangle( 1.0, 3.0 );
        shapes[2] = new Rectangle( 4.0, 2.0 );

        double total_area = 0;
        for (int i=0; i < shapes.length; i++ )
          total_area += shapes[i].area();
        System.out.println("Total area = " + total_area);
    }
}

examples/corpus_with_java_and_cpp/AddArray.java  view on Meta::CPAN


    public static void main( String[] args )                      //(B)
    {
        int[] data = { 0, 1, 2, 3, 4, 5, 9, 8, 7, 6 };            //(C)
        System.out.println( "The sum is: "                        //(D)
                                 + addArray(data) );  
    }

    public static int addArray( int[] a ) {                       //(E)
        int sum = 0;                               
        for ( int i=0; i < a.length; i++ )            
            sum += a[i];                             
        return sum;                                
    }
}          

examples/corpus_with_java_and_cpp/ArrayBasic.java  view on Meta::CPAN

    int age;
    public User( String nam, int yy ) {
        name = nam;
        age = yy;
    }
}

class Test {
    public static void main( String[] args ) {
        User[] user_list = new User[ 4 ];
        for ( int i=0; i<user_list.length; i++ )
            System.out.print( user_list[ i ] + "  " ); 
                                       // null null null null
    }
}



( run in 0.728 second using v1.01-cache-2.11-cpan-65fba6d93b7 )