2011-08-24 22:16:44 +02:00
|
|
|
<?php
|
|
|
|
|
2011-10-08 19:42:09 +02:00
|
|
|
// Source: http://www.php.net/manual/de/function.curl-setopt.php#102121
|
|
|
|
// This works around a safe_mode/open_basedir restriction
|
|
|
|
function curl_exec_follow(/*resource*/ $ch, /*int*/ &$maxredirect = null) {
|
|
|
|
$mr = $maxredirect === null ? 5 : intval($maxredirect);
|
|
|
|
if (ini_get('open_basedir') == '' && ini_get('safe_mode' == 'Off')) {
|
|
|
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $mr > 0);
|
|
|
|
curl_setopt($ch, CURLOPT_MAXREDIRS, $mr);
|
|
|
|
} else {
|
|
|
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
|
|
|
|
if ($mr > 0) {
|
|
|
|
$newurl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
|
|
|
|
|
|
|
|
$rch = curl_copy_handle($ch);
|
2012-06-26 22:26:53 +00:00
|
|
|
curl_setopt($ch, CURLOPT_USERAGENT, "Owncloud Bookmark Crawl");
|
2011-10-08 19:42:09 +02:00
|
|
|
curl_setopt($rch, CURLOPT_HEADER, true);
|
|
|
|
curl_setopt($rch, CURLOPT_NOBODY, true);
|
|
|
|
curl_setopt($rch, CURLOPT_FORBID_REUSE, false);
|
|
|
|
curl_setopt($rch, CURLOPT_RETURNTRANSFER, true);
|
|
|
|
do {
|
|
|
|
curl_setopt($rch, CURLOPT_URL, $newurl);
|
|
|
|
$header = curl_exec($rch);
|
|
|
|
if (curl_errno($rch)) {
|
|
|
|
$code = 0;
|
|
|
|
} else {
|
|
|
|
$code = curl_getinfo($rch, CURLINFO_HTTP_CODE);
|
|
|
|
if ($code == 301 || $code == 302) {
|
|
|
|
preg_match('/Location:(.*?)\n/', $header, $matches);
|
|
|
|
$newurl = trim(array_pop($matches));
|
|
|
|
} else {
|
|
|
|
$code = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} while ($code && --$mr);
|
|
|
|
curl_close($rch);
|
|
|
|
if (!$mr) {
|
|
|
|
if ($maxredirect === null) {
|
2012-10-10 19:05:34 +00:00
|
|
|
OCP\Util::writeLog(
|
|
|
|
'bookmark',
|
|
|
|
'Too many redirects. When following redirects, libcurl hit the maximum amount on bookmark',
|
|
|
|
OCP\Util::ERROR);
|
2011-10-08 19:42:09 +02:00
|
|
|
} else {
|
|
|
|
$maxredirect = 0;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
curl_setopt($ch, CURLOPT_URL, $newurl);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return curl_exec($ch);
|
|
|
|
}
|
|
|
|
|
2011-08-24 22:16:44 +02:00
|
|
|
function getURLMetadata($url) {
|
|
|
|
//allow only http(s) and (s)ftp
|
|
|
|
$protocols = '/^[hs]{0,1}[tf]{0,1}tp[s]{0,1}\:\/\//i';
|
|
|
|
//if not (allowed) protocol is given, assume http
|
|
|
|
if(preg_match($protocols, $url) == 0) {
|
|
|
|
$url = 'http://' . $url;
|
2012-06-27 11:20:51 +02:00
|
|
|
}
|
2011-08-25 13:23:52 +02:00
|
|
|
$metadata['url'] = $url;
|
2011-08-24 22:16:44 +02:00
|
|
|
|
2012-09-07 15:21:03 +02:00
|
|
|
if (!function_exists('curl_init')) {
|
2011-11-29 22:11:42 +00:00
|
|
|
return $metadata;
|
|
|
|
}
|
2011-09-23 12:49:14 +02:00
|
|
|
$ch = curl_init();
|
|
|
|
curl_setopt($ch, CURLOPT_URL, $url);
|
|
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
|
2011-10-08 19:42:09 +02:00
|
|
|
$page = curl_exec_follow($ch);
|
2011-09-23 12:49:14 +02:00
|
|
|
curl_close($ch);
|
|
|
|
|
2012-06-26 22:26:53 +00:00
|
|
|
@preg_match( "/<title>(.*)<\/title>/sUi", $page, $match );
|
2012-06-27 11:20:51 +02:00
|
|
|
$metadata['title'] = htmlspecialchars_decode(@$match[1]);
|
2011-08-24 22:16:44 +02:00
|
|
|
return $metadata;
|
2011-11-29 22:11:42 +00:00
|
|
|
}
|
2012-02-22 23:52:56 +01:00
|
|
|
|
2012-06-27 22:43:11 +00:00
|
|
|
function analyzeTagRequest($line) {
|
2012-10-10 19:05:34 +00:00
|
|
|
$tags = explode(',', $line);
|
2012-06-27 22:43:11 +00:00
|
|
|
$filterTag = array();
|
|
|
|
foreach($tags as $tag){
|
|
|
|
if(trim($tag) != '')
|
|
|
|
$filterTag[] = trim($tag);
|
|
|
|
}
|
|
|
|
return $filterTag;
|
2012-07-30 20:46:14 +02:00
|
|
|
}
|