001    // Copyright 2012, 2013 Brad Block, Pawjaw, LLC. (an Ohio Limited Liability Company)
002    // 
003    // This file is part of JFPPR. 
004    // 
005    // JFPPR is free software: you can redistribute it and/or modify
006    // it under the terms of the GNU General Public License as published by
007    // the Free Software Foundation, either version 3 of the License, or
008    // (at your option) any later version.
009    // 
010    // JFPPR is distributed in the hope that it will be useful,
011    // but WITHOUT ANY WARRANTY; without even the implied warranty of
012    // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
013    // GNU General Public License for more details.
014    // 
015    // You should have received a copy of the GNU General Public License
016    // along with JFPPR.  If not, see <http://www.gnu.org/licenses/>.
017    
018    package com.pawjaw.graph.fppr.demo;
019    
020    import com.pawjaw.graph.fppr.Graph;
021    import com.pawjaw.graph.fppr.Vertex;
022    import java.util.LinkedList;
023    import java.util.List;
024    
025    public class SimilarSpelledWords {
026        public static void main(String[] args) {
027            int i, I, j;
028            float weight;
029            Vertex a, b;
030            Graph word_graph;
031            List<Integer> vertex_ids = new LinkedList();
032            String[] words = new String[] {
033                "haviam",
034                "possui",
035                "provaram",
036                "tem",
037                "têm",
038                "tendo",
039                "tenha",
040                "tenham",
041                "ter",
042                "terem",
043                "teve",
044                "tido",
045                "tinha",
046                "tinham",
047                "tive",
048                "tiveram",
049                "tivesse",
050                "era",
051                "está",
052                "estão",
053                "estará",
054                "estarão",
055                "estava",
056                "estejam",
057                "estiverem",
058                "fica",
059                "ficam",
060                "ficará",
061                "ficarão",
062                "ficava",
063                "ficou",
064                "foi",
065                "for",
066                "são",
067                "seja",
068                "sejam",
069                "será",
070                "serão",
071                "situa",
072                "terá",
073                "terão",
074                "vai",
075                "vão"
076            };
077            word_graph = new Graph(I = words.length);
078            for(i = 0;i < I;i++) {
079                a = word_graph.vertex(i);
080                for(j = i + 1;j < I;j++)
081                    if((weight = Utils.scaledEditSimilarity(words[i], words[j], 2)) > 0) {
082                        b = word_graph.vertex(j);
083                        a.addOutgoingEdge(b, weight);
084                        b.addOutgoingEdge(a, weight);
085                    }
086            }
087            word_graph.walk(100);
088            for(i = 0;i < I;i++) {
089                System.out.println(words[i]);
090                j = 0;
091                for(int vertex_id : word_graph.rank(i, 10000, vertex_ids))
092                    if(j++ == 5)
093                        break;
094                    else
095                        System.out.println("  " + words[vertex_id]);
096            }
097        }
098    }