"\\\\", "\"" => "\\\"", "\b" => "\\b", "\f" => "\\f", "\n" => "\\n", "\r" => "\\r", "\t" => "\\t", ); $text = str_replace(array_keys($encoding), array_values($encoding), $text); // convert remaining ASCII control characters to json unicode escapes $text = preg_replace_callback("/[\\x00-\\x1f\\x7f]/", create_function( '$s', 'return sprintf("\\u%04x", ord($s[0]));'), $text); return $text; } function curl_get_content($url) { $ch = curl_init(); $timeout = 5; curl_setopt ($ch, CURLOPT_URL, $url); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout); $data = curl_exec($ch); curl_close($ch); return $data; } $store = ARC2::getStore($arc_config); if (!$store->isSetUp()) { $store->setUp(); } $q = " PREFIX sioc: PREFIX sioct: PREFIX foaf: PREFIX rdf: PREFIX dct: select distinct ?post ?date ?content ?maker ?name ?depiction where { { ?post rdf:type sioct:MicroblogPost } union { ?post rdf:type sioct:MicroBlogPost } ?post foaf:maker ?maker ; sioc:content ?content ; dct:created ?date . { ?maker foaf:name ?name } union { ?post sioc:has_creator ?user . ?user sioc:name ?name } . { ?maker foaf:img ?depiction } union { ?maker foaf:depiction ?depiction } union { ?maker foaf:thumbnail ?depiction } } ORDER BY DESC(?date) LIMIT 200 "; $rs = $store->query($q); // delete duplicate posts XXX we should pick a non-random row :-) $distinct_rows = array(); foreach ($rs['result']['rows'] as $row) $distinct_rows[$row['post']] = $row; foreach ($distinct_rows as $row) { // import the bindings of the result row as php variables foreach ($row as $k => $v) { $kn = str_replace(' ', '_', $k); $$kn = $v; } $day = date("Y-m-d", strtotime($date)); // Prettify the plain text content // trying to be smart in extracting URIs, // based on http://tuukka.iki.fi/irclogs $content = preg_replace("#(http(s)?://[^\s]*[^\s,.:])(\)(,?\s|$))?#", "\\1\\3", $content); // Retrieve topics from content // should be added in the triple store directly // (when inserting new data ?) // XXX which unicode characters should be allowed in tags? preg_match_all("/(^|\s)#([\w\pL]+(:[\w\pL]+)?)/su", $content, $match); $topics = ''; $locations = ''; $latlng = ''; foreach($match[2] as $t) { $ex = explode(':', $t); if($ex[0]=='geo') { $place = $ex[1]; $locations .= '"'.$ex[1].'", '; $geo = "http://ws.geonames.org/search?q=$place&maxrows=1&type=json&maxRows=1"; $data = curl_get_content($geo); $d = json_decode($data); $loc = $d->geonames[0]; $id = $loc->geonameId; $url = "http://www.geonames.org/$id"; $lng = $loc->lng; $lat = $loc->lat; $latlng = "$lat,$lng"; $content = str_replace($t, "$t", $content); } elseif($ex[0]=='dbp') { $url = 'http://dbpedia.org/resource/'.$ex[1]; $content = str_replace($t, "$t", $content); $topics .= "\"$t\", "; } else { $topics .= '"' . mb_strtolower($t, "utf-8") . '", '; } } $topics = substr($topics, 0, -2); $locations = substr($locations, 0, -2); $content = json_quote($content); // FIXME quote other parts properly too $json .= "\n{ type: \"MicroblogPost\", label: \"$date\", date: \"$date\", day: \"$day\", content: \"$content\", name: \"$name\", depiction: \"$depiction\", topics: [$topics], locations: [$locations], latlng: \"$latlng\"},"; } echo "{\"items\" : ["; echo substr($json, 0, -1); echo '] }'; ?>