Solr.php
Go to the documentation of this file.
00001 <?php 00002 00005 00021 class Solr 00022 { 00024 private $updateUrl; 00025 00027 private $selectUrl; 00028 00030 private $lukeUrl; 00031 00036 private $fieldIndexFolder; 00037 00038 00054 function __construct($core = "", $host = "localhost", $port = "8983", $fieldIndexFolder = "/tmp/") 00055 { 00056 if($core != "") 00057 { 00058 $this->updateUrl = "http://$host:$port/solr/$core/update"; 00059 $this->selectUrl = "http://$host:$port/solr/$core/select"; 00060 $this->lukeUrl = "http://$host:$port/solr/$core/admin/luke?numTerms=0"; 00061 } 00062 else 00063 { 00064 $this->updateUrl = "http://$host:$port/solr/update"; 00065 $this->selectUrl = "http://$host:$port/solr/select"; 00066 $this->lukeUrl = "http://$host:$port/solr/admin/luke?numTerms=0"; 00067 } 00068 00069 $this->fieldIndexFolder = $fieldIndexFolder; 00070 } 00071 00072 function __destruct() { } 00073 00086 public function select($query) { return $this->sendQuery($query); } 00087 00100 public function update($content) { return $this->sendContent($content); } 00101 00112 public function commit() { return $this->sendContent("<commit />"); } 00113 00126 public function deleteInstanceRecord($uri, $dataset) 00127 { 00128 if($uri != "" && $dataset != "") 00129 { 00130 return $this->sendContent("<delete><id>" . md5($dataset . $uri) . "</id></delete>"); 00131 } 00132 } 00133 00144 public function optimize() { return $this->sendContent("<optimize />"); } 00145 00156 public function flushIndex() { return $this->sendContent("<delete><query>*:*</query></delete>"); } 00157 00170 public function flushDataset($dataset) 00171 { return $this->sendContent("<delete><query>dataset:\"$dataset\"</query></delete>"); } 00172 00184 private function sendQuery($query) 00185 { 00186 $ch = curl_init(); 00187 00188 00189 $headers = array( "Content-Type: text/xml" ); 00190 00191 curl_setopt($ch, CURLOPT_HEADER, 0); 00192 curl_setopt($ch, CURLOPT_URL, $this->selectUrl); 00193 curl_setopt($ch, CURLOPT_POST, 1); 00194 curl_setopt($ch, CURLOPT_POSTFIELDS, $query); 00195 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 00196 00197 $data = curl_exec($ch); 00198 00199 if(curl_errno($ch)) 00200 { 00201 return FALSE; 00202 } 00203 else 00204 { 00205 return ($data); 00206 } 00207 } 00208 00223 public function createSolrAddElementFromWSElement($wsElement) 00224 { 00225 $xml = new ProcessorXML(); 00226 $xml->loadXML($wsElement); 00227 00228 $subjects = $xml->getSubjects(); 00229 00230 $adds = array(); 00231 00232 include_once("ontologies/classHierarchySerialized.php"); 00233 00234 foreach($subjects as $subject) 00235 { 00236 $types = array(); 00237 $subjectURI = $xml->getURI($subject); 00238 $subjectType = @$xml->getType($subject); 00239 00240 array_push($types, get_label_uri($subjectType)); 00241 00242 $add = "<add><doc><field name=\"uri\">" . get_label_uri($subjectURI) . "</field>"; 00243 00244 if($subjectType != "") 00245 { 00246 $add .= "<field name=\"type\">" . get_label_uri($subjectType) . "</field>"; 00247 } 00248 00249 $predicates = $xml->getPredicates($subject); 00250 00251 foreach($predicates as $predicate) 00252 { 00253 $objects = $xml->getObjects($predicate); 00254 00255 foreach($objects as $object) 00256 { 00257 @$objectType = $xml->getType($object); 00258 $predicateType = $xml->getType($predicate); 00259 00260 if($objectType == "rdfs:Literal") 00261 { 00262 $objectValue = $xml->getContent($object); 00263 00264 $add .= "<field name=\"property\">" . get_label_uri($predicateType) . "</field>"; 00265 $add .= "<field name=\"text\">" . $this->xmlEncode($objectValue) . "</field>"; 00266 } 00267 } 00268 } 00269 00270 // Get all types by inference 00271 foreach($types as $type) 00272 { 00273 $superClasses = $classHierarchy->getSuperClasses($type); 00274 00275 foreach($superClasses as $sc) 00276 { 00277 $add .= "<field name=\"inferred_type\">" . $this->xmlEncode($sc->name) . "</field>"; 00278 } 00279 } 00280 00281 $add .= "</doc></add>"; 00282 00283 array_push($adds, $add); 00284 } 00285 00286 return ($adds); 00287 } 00288 00301 /* 00302 public function createUpdateSolrDocument($solrDocument) 00303 { 00304 if($solrDocument->uri == "") 00305 { 00306 return(FALSE); 00307 } 00308 00309 // If there is no "inferred_type" defined for this solrDocument, we try to find some. 00310 if(count($solrDocument->inferredTypes) <= 0) 00311 { 00312 include_once("ontologies/classHierarchySerialized.php"); 00313 00314 foreach($solrDocument->types as $type) 00315 { 00316 $superClasses = $classHierarchy->getSuperClasses($type); 00317 00318 foreach($superClasses as $sc) 00319 { 00320 $solrDocument->addInferredType($sc->name); 00321 } 00322 } 00323 } 00324 00325 // If there is no object_property/object_label pairs defined for this document; we try to find some. 00326 if(count($solrDocument->objectPropertiesLabels) <= 0) 00327 { 00328 global $dbUsername, $dbPassword, $dbDSN, $dbHost; 00329 global $base_url; 00330 00331 include_once("WebService.php"); 00332 00333 $data_ini = parse_ini_file(WebService::$data_ini."data.ini", TRUE); 00334 00335 $this->db = new DB_Virtuoso($data_ini["triplestore"]["username"], $data_ini["triplestore"]["password"], $data_ini["triplestore"]["dsn"], $data_ini["triplestore"]["host"]); 00336 00337 00338 $db = new DB_Virtuoso($dbUsername, $dbPassword, $dbDSN, $dbHost); 00339 00340 $query = $db->build_sparql_query("select ?p ?o (str(DATATYPE(?o))) as ?otype from <".get_domain($base_url)."/data/core/> where {<".$solrDocument->uri."> ?p ?o.}", array ('p', 'o', 'otype'), FALSE); 00341 00342 $resultset = $db->query($query); 00343 00344 while(odbc_fetch_row($resultset)) 00345 { 00346 $property = odbc_result($resultset, 1); 00347 $object = odbc_result($resultset, 2); 00348 $otype = odbc_result($resultset, 3); 00349 00350 if($otype == "" && strpos($property, "http://www.w3.org/1999/02/22-rdf-syntax-ns#") === FALSE && strpos($property, "http://www.w3.org/2000/01/rdf-schema#") === FALSE && strpos($property, "http://www.w3.org/2002/07/owl#") === FALSE) 00351 { 00352 $query = $db->build_sparql_query("select ?p ?o from <".get_domain($base_url)."/data/core/> where {<$object> ?p ?o.}", array ('p', 'o'), FALSE); 00353 00354 $resultset2 = $db->query($query); 00355 00356 $subjectTriples = array(); 00357 00358 while(odbc_fetch_row($resultset2)) 00359 { 00360 $p = odbc_result($resultset2, 1); 00361 $o = odbc_result($resultset2, 2); 00362 00363 if(!isset($subjectTriples[$p])) 00364 { 00365 $subjectTriples[$p] = array(); 00366 } 00367 00368 array_push($subjectTriples[$p], $o); 00369 } 00370 00371 unset($resultset2); 00372 00373 $labels = ""; 00374 foreach($labelProperties as $property) 00375 { 00376 if(isset($subjectTriples[$property])) 00377 { 00378 $labels = $subjectTriples[$property][0]." "; 00379 } 00380 } 00381 00382 if($labels != "") 00383 { 00384 $solrDocument->addObjectPropertyLabel(array($property, $labels)); 00385 } 00386 else 00387 { 00388 $solrDocument->addObjectPropertyLabel(array($property, "-")); 00389 } 00390 } 00391 } 00392 00393 unset($resultset); 00394 00395 $db->close(); 00396 $this->update("<add>".$solrDocument->serializeSolrDocument()."</add>"); 00397 } 00398 } */ 00399 00412 public function xmlEncode($string) 00413 { return str_replace(array ("\\", "&", "<", ">"), array ("%5C", "&", "<", ">"), $string); } 00414 00427 private function sendContent($content) 00428 { 00429 $ch = curl_init(); 00430 00431 $headers = array( "Content-Type: text/xml" ); 00432 00433 curl_setopt($ch, CURLOPT_HEADER, 0); 00434 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 00435 curl_setopt($ch, CURLOPT_URL, $this->updateUrl); 00436 curl_setopt($ch, CURLOPT_POST, 1); 00437 curl_setopt($ch, CURLOPT_POSTFIELDS, $content); 00438 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 00439 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); 00440 00441 $data = curl_exec($ch); 00442 00443 if(curl_errno($ch)) 00444 { 00445 return false; 00446 } 00447 else 00448 { 00449 if(strstr($data, '<int name="status">0</int>')) 00450 { 00451 return true; 00452 } 00453 else 00454 { 00455 return false; 00456 } 00457 } 00458 } 00459 00467 public function getFieldsIndex() 00468 { 00469 if(!file_exists($this->fieldIndexFolder."solrFields.srz")) 00470 { 00471 // Force the creation of the index if the file is not existing 00472 $this->updateFieldsIndex(); 00473 } 00474 00475 return(unserialize(file_get_contents($this->fieldIndexFolder."solrFields.srz"))); 00476 } 00477 00484 public function updateFieldsIndex() 00485 { 00486 $ch = curl_init(); 00487 00488 $headers = array( "Content-Type: text/xml" ); 00489 00490 curl_setopt($ch, CURLOPT_HEADER, 0); 00491 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 00492 curl_setopt($ch, CURLOPT_URL, $this->lukeUrl); 00493 curl_setopt($ch, CURLOPT_POST, 1); 00494 curl_setopt($ch, CURLOPT_POSTFIELDS, $content); 00495 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 00496 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); 00497 00498 $data = curl_exec($ch); 00499 00500 if(curl_errno($ch)) 00501 { 00502 return FALSE; 00503 } 00504 else 00505 { 00506 $fields = array(); 00507 00508 $domResultset = new DomDocument("1.0", "utf-8"); 00509 $domResultset->loadXML($data); 00510 00511 $xpath = new DOMXPath($domResultset); 00512 00513 $founds = $xpath->query("//*/lst[@name='fields']//lst"); 00514 00515 foreach($founds as $found) 00516 { 00517 array_push($fields, $found->getAttribute("name")); 00518 } 00519 00520 $fields = array_unique($fields); 00521 00522 file_put_contents($this->fieldIndexFolder."fieldsIndex.srz", serialize($fields)); 00523 } 00524 00525 return TRUE; 00526 } 00527 } 00528 00529 00535 class SolrDocument 00536 { 00538 public $uri; 00539 00541 public $types; 00542 00544 public $inferredTypes; 00545 00547 public $objectPropertiesLabels; 00548 00550 public $propertiesTexts; 00551 00552 function __construct($uri = "", $types = array(), $inferredTypes = array(), $objectPropertiesLabels = array(), 00553 $propertiesTexts = array()) 00554 { 00555 $this->uri = $uri; 00556 $this->types = $types; 00557 $this->inferredTypes = $inferredTypes; 00558 $this->objectPropertiesLabels = $objectPropertiesLabels; 00559 $this->propertiesTexts = $propertiesTexts; 00560 } 00561 00562 function __destruct() { } 00563 00574 public function addObjectPropertyLabel($propertyLabel) { array_push($this->objectPropertiesLabels, $propertyLabel); } 00575 00586 public function addPropertyText($propertyText) { array_push($this->propertiesTexts, $propertyText); } 00587 00598 public function addType($type) { array_push($this->types, $type); } 00599 00610 public function addInferredType($inferredType) { array_push($this->inferredTypes, $inferredType); } 00611 00622 public function serializeSolrDocument() 00623 { 00624 $serialization = "<doc>"; 00625 00626 $serialization .= "<field name=\"uri\">" . $this->uri . "</field>"; 00627 00628 foreach($this->types as $type) 00629 { 00630 $serialization .= "<field name=\"type\">" . $type . "</field>"; 00631 } 00632 00633 foreach($this->inferredTypes as $inferredType) 00634 { 00635 $serialization .= "<field name=\"inferred_type\">" . $inferredType . "</field>"; 00636 } 00637 00638 foreach($this->propertiesTexts as $propertyText) 00639 { 00640 $serialization .= "<field name=\"property\">" . $this->xmlEncode($propertyText[0]) . "</field>"; 00641 $serialization .= "<field name=\"text\">" . $this->xmlEncode($propertyText[1]) . "</field>"; 00642 } 00643 00644 foreach($this->objectPropertiesLabels as $propertyLabel) 00645 { 00646 $serialization .= "<field name=\"object_property\">" . $this->xmlEncode($propertyLabel[0]) . "</field>"; 00647 $serialization .= "<field name=\"object_label\">" . $this->xmlEncode($propertyLabel[1]) . "</field>"; 00648 } 00649 00650 $serialization .= "</doc>"; 00651 00652 return ($serialization); 00653 } 00654 00667 public function xmlEncode($string) 00668 { return str_replace(array ("\\", "&", "<", ">"), array ("%5C", "&", "<", ">"), $string); } 00669 } 00670 00672 00673 ?>
