Here's a quick and dirty class for geocoding through an IP rotator... It uses the same IP until google tells you you can't make any more requests, then uses a different IP. Must have curl installed to use.
class Error {
function log_message ( $message ) { error_log("GEOCODER: $message"); }
}
class GoogleGeocoder {
var $source_ips = array ( "127.0.0.1" );
var $current_ip_index = NULL;
var $current_ip = NULL;
var $ip_exhausted = false;
var $geo_url = "http://maps.google.com/maps/api/geocode/json";
var $geo_get_params = array("sensor=false");
/**
* @see DB_Row::construct
*/
function __construct () { //{{{
$this->change_ip();
} //}}}
function change_ip ( ) {
// If we already exhausted them from previous iterations, block...
if ( $this->ip_exhausted === true ) { Error::log_message("All ips exhausted, please do not try any more geocodes"); return false; }
// If we have never set the IP, set it for index 0
if ( $this->current_ip === NULL ) { $this->current_ip = $this->source_ips[0]; $this->current_ip_index=0; return true; }
// Otherwise rotate until we hit last one
if ( isset($this->source_ips[++$this->current_ip_index]) ) {
$this->current_ip=$this->source_ips[$this->current_ip_index];
} else {
$this->current_ip_index=false;
$this->ip_exhausted=true;
}
}
//Function to send XML request via curl
function send_request_via_curl($url,$postdata=NULL,$xml=true) {
if ( $this->ip_exhausted ) { Error::log_message("All ips exhausted, please do not try any more geocodes"); return false; }
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
if ( $xml ) curl_setopt($ch, CURLOPT_HTTPHEADER, Array("Content-Type: text/xml"));
// curl_setopt($ch, CURLOPT_HEADER, 1);
if ( ! is_null($postdata) ) {
curl_setopt($ch, CURLOPT_POSTFIELDS, $postdata);
curl_setopt($ch, CURLOPT_POST, 0);
}
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
$response = curl_exec($ch);
usleep(100000);
return $response;
}
function geocode ( $address, $country="" ) {
if ( empty($address) ) { Error::log_message("Empty address"); return false; }
if ( $this->ip_exhausted ) { Error::log_message("All ips exhausted, please do not try any more geocodes"); return false; }
if ( is_array($address) ) { $address=join(', ',$address); }
$address=urlencode(htmlspecialchars($address));
$get_params=$this->geo_get_params;
$get_params[]="address=" . $address;
if ( ! empty($country) ) {
if ( $country == "gb" ) { $country="uk"; }
$get_params[]="region=" . $country;
}
$url=utf8_encode($this->geo_url . "?" . join('&',$get_params));
if ( false !== ($response=$this->send_request_via_curl($url)) ) {
if ( false !== ($result=json_decode($response,true)) ) {
if ( $response["status"] == "OVER_QUERY_LIMIT" || $response["status"] == "REQUEST_DENIED" || $response["status"] == "INVALID_REQUEST" ) {
Error::log_message("Got response " . $response["status"] . ", switching IP's");
if ( false === $this->change_ip() ) {
Error::log_message("change_ip returned false, so geocode() is exiting");
return false;
} else {
return $this->geocode($address,$country);
}
}
return $this->parse_geocode_response($result);
} else {
Error::log_message("could not json_decode() result");
return false;
}
} else {
Error::log_message("curl $url returned false!");
return false;
}
}
function parse_geocode_response ( $json ) {
$result["geo_stat"]=($json["status"] == "OK" ? "200" : "404");
if ( $json["status"] == "OK" ) {
$active_record=$json["results"][0];
$result["geocoder_source"]="google";
$result["geo_lat"]=$active_record["geometry"]["location"]["lat"];
$result["geo_lon"]=$active_record["geometry"]["location"]["lng"];
$result["geo_resolution"]=$active_record["geometry"]["location_type"];
foreach ( $active_record["address_components"] as $key => $info ) {
switch ( $info["types"][0] ) {
case 'street_number': $result["addr_number"]=$info["long_name"]; break;
case 'route': $result["addr_street_name"]=$info["long_name"]; break;
case 'locality': $result["addr_city"]=$info["long_name"]; break;
case 'administrative_area_level_1': $result["addr_state"]=$info["short_name"]; break;
case 'postal_code': $result["addr_zip"]=$info["long_name"]; break;
}
}
$result["addr_street"]=@$result["addr_street_name"];
}
return $result;
}
}
/* Example Usage:
$record["text_address"]="1600 Pensylvania Ave., Washington DC";
$geocoder=new GoogleGeocode();
if ( false !== ($geo_info=$geocoder->geocode($record["text_address"]) ) {
$record=array_merge($record,$geo_info);
} else {
echo "Stop geocoding, google cut you off!\n";
}
print_r($record);
Array(
["text_address"] => "1600 Pensylvania Ave., Washington DC",
["geocoder_source"] => "google",
["geo_lat"] => "x.xxx",
["geo_lon"] => "x.xxx",
["geo_stat"] => "200", // HTTP status codes, 404 means address not found
["geo_resolution"] => "ROOFTOP",
["addr_number"] => 1600,
["addr_street_name"] => "Pensylvania Ave",
["addr_city"] => "Washington",
["addr_state"] => "DC", // I'm just guessing here, maybe MD?
["addr_zip"] => "12345"
);
*/
?>