001 // Copyright 2012, 2013 Brad Block, Pawjaw, LLC. (an Ohio Limited Liability Company) 002 // 003 // This file is part of JFPPR. 004 // 005 // JFPPR is free software: you can redistribute it and/or modify 006 // it under the terms of the GNU General Public License as published by 007 // the Free Software Foundation, either version 3 of the License, or 008 // (at your option) any later version. 009 // 010 // JFPPR is distributed in the hope that it will be useful, 011 // but WITHOUT ANY WARRANTY; without even the implied warranty of 012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 013 // GNU General Public License for more details. 014 // 015 // You should have received a copy of the GNU General Public License 016 // along with JFPPR. If not, see <http://www.gnu.org/licenses/>. 017 018 package com.pawjaw.graph.fppr; 019 020 import java.util.Arrays; 021 import java.util.List; 022 import java.util.Random; 023 024 /** 025 * This is a structure for representing a graph capable of performing Fully 026 * Personalized PageRank efficiently through random walk sampling. 027 * <p>This structure can be used as follows: 028 * <ol><li>A Graph instance is instantiated with the number of {@link Vertex} 029 * elements in the graph.</li><li>Following this, all outgoing edges are 030 * specified with {@link Vertex#addOutgoingEdge(Vertex, float)}.</li><li>After 031 * the edges are set, random walks are simulated on the graph using the 032 * {@link #walk(int)} method.</li><li>Finally, for Vertex elements for which 033 * a Personalized PageRank is desired, the {@link #rank(int,int,List)} 034 * is used.</li> 035 * </ol> 036 */ 037 public final class Graph { 038 private Random r = new Random(); 039 private Vertex[] vertices, sorted_vertices; 040 041 /** 042 * The probability of ending a random walk segment and either resetting 043 * to a source vertex or jumping to another random vertex. 044 */ 045 public static final float RESET_PROBABILITY = 0.2f; 046 047 /** 048 * @param vertex_count number of {@link Vertex} elements in the graph 049 */ 050 public Graph(int vertex_count) { 051 int i; 052 vertices = new Vertex[vertex_count]; 053 sorted_vertices = new Vertex[vertex_count]; 054 for(i = 0;i < vertex_count;i++) 055 sorted_vertices[i] = vertices[i] = new Vertex(i); 056 } 057 058 /** 059 * @param vertex_id the id of the vertex to retrieve. Starting at zero 060 * and going to the number of {@link Vertex} elements specified in the 061 * constructor (exclusive). 062 * @return the {@link Vertex} element corresponding to the specified vertex 063 * id. 064 */ 065 public Vertex vertex(int vertex_id) { 066 return vertices[vertex_id]; 067 } 068 069 /** 070 * @return the number of {@link Vertex} elements specified in the 071 * constructor. 072 */ 073 public int vertices() { 074 return vertices.length; 075 } 076 077 /** 078 * Simulate random walks. 079 * 080 * @param walks_per_vertex the number of random walks per Vertex to take. 081 * This number should be proportional to the logarithm of the number of 082 * vertices. For graphs that will fit in main memory, this number will 083 * probably not need exceed 10 or 15. A walk is a successive number of 084 * weighted random transitions following outgoing edges until a reset 085 * is encountered. 086 * 087 * @see #RESET_PROBABILITY 088 */ 089 public void walk(int walks_per_vertex) { 090 int i, I = vertices.length, w; 091 for(w = 0;w < walks_per_vertex;w++) 092 for(i = 0;i < I;i++) 093 walk(vertices[i]); 094 } 095 096 private void walk(Vertex start) { 097 Step last_step = new Step(start); 098 while(r.nextFloat() > RESET_PROBABILITY) 099 last_step = last_step.next_step = new Step(last_step.vertex.sampleNeighbor()); 100 } 101 102 /** 103 * Obtain a descending ranking of nearest neighbors according to a 104 * Personalized PageRank of this Graph. 105 * 106 * @param start_vertex_id the id of the source {@link Vertex} for which to 107 * perform the Personalized PageRank and obtain neighbors. This number goes 108 * from zero to the number of vertices specified in the constructor 109 * (exclusive) 110 * @param walk_length the total number of walk steps taken. Resets to the 111 * {@link Vertex} specified by the start_vertex_id are also counted as a 112 * walk step. A reasonable number for this is on the order of 10,000. 113 * @param result a user-specified destination for storing the ranked 114 * {@link Vertex} ids such that the nearest neighbor will be stored first. 115 * 116 * @return the user-specified destination for storing the ranked 117 * {@link Vertex} ids (returned for convenience). 118 */ 119 public List<Integer> rank(int start_vertex_id, int walk_length, List<Integer> result) { 120 int i, I = vertices.length, steps = 1; 121 Step step; 122 Vertex last_vertex = vertices[start_vertex_id]; 123 result.clear(); 124 for(i = 0;i < I;i++) 125 vertices[i].reset(); 126 last_vertex.visit(); 127 while(steps++ < walk_length) 128 if(r.nextFloat() <= RESET_PROBABILITY) 129 (last_vertex = vertices[start_vertex_id]).visit(); 130 else { 131 if((step = last_vertex.nextSegmentStart()) != null) { 132 while((step = step.next_step) != null) 133 step.vertex.visit(); 134 (last_vertex = vertices[start_vertex_id]).visit(); 135 } else 136 (last_vertex = last_vertex.sampleNeighbor()).visit(); 137 } 138 Arrays.sort(sorted_vertices); 139 for(Vertex v : sorted_vertices) 140 if(v.visits() == 0) 141 break; 142 else if(v.id != start_vertex_id) 143 result.add(v.id); 144 return result; 145 } 146 147 protected static class Step { 148 Vertex vertex; 149 Step next_step = null; 150 151 Step(Vertex v) { 152 vertex = v; 153 v.addSegment(this); 154 } 155 } 156 }