001    // Copyright 2012, 2013 Brad Block, Pawjaw, LLC. (an Ohio Limited Liability Company)
002    // 
003    // This file is part of JFPPR.
004    // 
005    // JFPPR is free software: you can redistribute it and/or modify
006    // it under the terms of the GNU General Public License as published by
007    // the Free Software Foundation, either version 3 of the License, or
008    // (at your option) any later version.
009    // 
010    // JFPPR is distributed in the hope that it will be useful,
011    // but WITHOUT ANY WARRANTY; without even the implied warranty of
012    // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
013    // GNU General Public License for more details.
014    // 
015    // You should have received a copy of the GNU General Public License
016    // along with JFPPR.  If not, see <http://www.gnu.org/licenses/>.
017    
018    package com.pawjaw.graph.fppr;
019    
020    import java.util.Arrays;
021    import java.util.List;
022    import java.util.Random;
023    
024    /**
025     * This is a structure for representing a graph capable of performing Fully
026     * Personalized PageRank efficiently through random walk sampling.
027     * <p>This structure can be used as follows:
028     * <ol><li>A Graph instance is instantiated with the number of {@link Vertex}
029     * elements in the graph.</li><li>Following this, all outgoing edges are
030     * specified with {@link Vertex#addOutgoingEdge(Vertex, float)}.</li><li>After
031     * the edges are set, random walks are simulated on the graph using the
032     * {@link #walk(int)} method.</li><li>Finally, for Vertex elements for which
033     * a Personalized PageRank is desired, the {@link #rank(int,int,List)}
034     * is used.</li>
035     * </ol>
036     */
037    public final class Graph {
038        private Random r = new Random();
039        private Vertex[] vertices, sorted_vertices;
040    
041        /**
042         * The probability of ending a random walk segment and either resetting
043         * to a source vertex or jumping to another random vertex.
044         */
045        public static final float RESET_PROBABILITY = 0.2f;
046    
047        /**
048         * @param vertex_count number of {@link Vertex} elements in the graph
049         */
050        public Graph(int vertex_count) {
051            int i;
052            vertices = new Vertex[vertex_count];
053            sorted_vertices = new Vertex[vertex_count];
054            for(i = 0;i < vertex_count;i++)
055                sorted_vertices[i] = vertices[i] = new Vertex(i);
056        }
057    
058        /**
059         * @param vertex_id the id of the vertex to retrieve.  Starting at zero
060         * and going to the number of {@link Vertex} elements specified in the
061         * constructor (exclusive).
062         * @return the {@link Vertex} element corresponding to the specified vertex
063         * id.
064         */
065        public Vertex vertex(int vertex_id) {
066            return vertices[vertex_id];
067        }
068    
069        /**
070         * @return the number of {@link Vertex} elements specified in the
071         * constructor.
072         */
073        public int vertices() {
074            return vertices.length;
075        }
076    
077        /**
078         * Simulate random walks.
079         *
080         * @param walks_per_vertex the number of random walks per Vertex to take.
081         * This number should be proportional to the logarithm of the number of
082         * vertices.  For graphs that will fit in main memory, this number will
083         * probably not need exceed 10 or 15.  A walk is a successive number of
084         * weighted random transitions following outgoing edges until a reset
085         * is encountered.
086         *
087         * @see #RESET_PROBABILITY
088         */
089        public void walk(int walks_per_vertex) {
090            int i, I = vertices.length, w;
091            for(w = 0;w < walks_per_vertex;w++)
092                for(i = 0;i < I;i++)
093                    walk(vertices[i]);
094        }
095    
096        private void walk(Vertex start) {
097            Step last_step = new Step(start);
098            while(r.nextFloat() > RESET_PROBABILITY)
099                last_step = last_step.next_step = new Step(last_step.vertex.sampleNeighbor());
100        }
101    
102        /**
103         * Obtain a descending ranking of nearest neighbors according to a
104         * Personalized PageRank of this Graph.
105         *
106         * @param start_vertex_id the id of the source {@link Vertex} for which to
107         * perform the Personalized PageRank and obtain neighbors.  This number goes
108         * from zero to the number of vertices specified in the constructor
109         * (exclusive)
110         * @param walk_length the total number of walk steps taken.  Resets to the
111         * {@link Vertex} specified by the start_vertex_id are also counted as a
112         * walk step.  A reasonable number for this is on the order of 10,000.
113         * @param result a user-specified destination for storing the ranked
114         * {@link Vertex} ids such that the nearest neighbor will be stored first.
115         *
116         * @return the user-specified destination for storing the ranked
117         * {@link Vertex} ids (returned for convenience).
118         */
119        public List<Integer> rank(int start_vertex_id, int walk_length, List<Integer> result) {
120            int i, I = vertices.length, steps = 1;
121            Step step;
122            Vertex last_vertex = vertices[start_vertex_id];
123            result.clear();
124            for(i = 0;i < I;i++)
125                vertices[i].reset();
126            last_vertex.visit();
127            while(steps++ < walk_length)
128                if(r.nextFloat() <= RESET_PROBABILITY)
129                    (last_vertex = vertices[start_vertex_id]).visit();
130                else {
131                    if((step = last_vertex.nextSegmentStart()) != null) {
132                        while((step = step.next_step) != null)
133                            step.vertex.visit();
134                        (last_vertex = vertices[start_vertex_id]).visit();
135                    } else
136                        (last_vertex = last_vertex.sampleNeighbor()).visit();
137                }
138            Arrays.sort(sorted_vertices);
139            for(Vertex v : sorted_vertices)
140                if(v.visits() == 0)
141                    break;
142                else if(v.id != start_vertex_id)
143                    result.add(v.id);
144            return result;
145        }
146    
147        protected static class Step {
148            Vertex vertex;
149            Step next_step = null;
150    
151            Step(Vertex v) {
152                vertex = v;
153                v.addSegment(this);
154            }
155        }
156    }