1 |
matthys |
15 |
<?php |
2 |
|
|
/* This file is part of BBClone (A PHP based Web Counter on Steroids) |
3 |
|
|
* |
4 |
|
|
* CVS FILE $Id: referrer.php,v 1.45 2011/12/30 23:03:47 joku Exp $ |
5 |
|
|
* |
6 |
|
|
* Copyright (C) 2001-2012, the BBClone Team (see doc/authors.txt for details) |
7 |
|
|
* |
8 |
|
|
* This program is free software: you can redistribute it and/or modify |
9 |
|
|
* it under the terms of the GNU General Public License as published by |
10 |
|
|
* the Free Software Foundation, either version 3 of the License, or |
11 |
|
|
* (at your option) any later version. |
12 |
|
|
* |
13 |
|
|
* This program is distributed in the hope that it will be useful, |
14 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 |
|
|
* GNU General Public License for more details. |
17 |
|
|
* |
18 |
|
|
* See doc/copying.txt for details |
19 |
|
|
*/ |
20 |
|
|
|
21 |
|
|
////////////// |
22 |
|
|
// Referrer // |
23 |
|
|
////////////// |
24 |
|
|
|
25 |
|
|
// referer stats |
26 |
|
|
function bbc_sum_item($keep, $remove) { |
27 |
|
|
global $access; |
28 |
|
|
|
29 |
|
|
$access['referer'][$keep] += $access['referer'][$remove]; |
30 |
|
|
|
31 |
|
|
unset($access['referer'][$remove]); |
32 |
|
|
} |
33 |
|
|
|
34 |
|
|
function bbc_update_referer_stat($referer) { |
35 |
|
|
global $access; |
36 |
|
|
|
37 |
|
|
if ($referer == "ignored") { |
38 |
|
|
$access['referer']['ignored'] = !isset($access['referer']['ignored']) ? 1 : ++$access['referer']['ignored']; |
39 |
|
|
return; |
40 |
|
|
} |
41 |
|
|
|
42 |
|
|
$referer_light = substr(strstr($referer, "://"), 3); |
43 |
|
|
|
44 |
|
|
if (empty($referer_light)) { |
45 |
|
|
$access['referer']['not_specified'] = !isset($access['referer']['not_specified']) ? 1 : |
46 |
|
|
++$access['referer']['not_specified']; |
47 |
|
|
return; |
48 |
|
|
} |
49 |
|
|
|
50 |
|
|
if (($qm = strpos($referer_light, "?")) !== false) $referer_light = substr($referer_light, 0, $qm); |
51 |
|
|
if ((($parent = dirname($referer_light)) == ".") || ($parent === false)) { |
52 |
|
|
$referer_light = (substr($referer_light, -1) == "/") ? $referer_light : $referer_light."/"; |
53 |
|
|
} |
54 |
|
|
else $referer_light = $parent."/"; |
55 |
|
|
|
56 |
|
|
// compare whether we got a "www.*" equivalent recorded (or missing) |
57 |
|
|
$prefix = substr($referer_light, 0, ($tmp = strpos($referer_light, "."))); |
58 |
|
|
$suffix = substr($referer_light, ++$tmp); |
59 |
|
|
$result = ($prefix != "www") ? "www.".$referer_light : $suffix; |
60 |
|
|
// patterns for old records without trailing slashes |
61 |
|
|
$ref_no_slash = substr($referer_light, 0, -1); |
62 |
|
|
$res_no_slash = substr($result, 0, -1); |
63 |
|
|
|
64 |
|
|
// neither recorded with "www." nor without, seems to be our 1st visit ;) |
65 |
|
|
if (!isset($access['referer'][$referer_light]) && !isset($access['referer'][$result])) { |
66 |
|
|
$access['referer'][$referer_light] = 1; |
67 |
|
|
} |
68 |
|
|
// if there are old records without trailing slashes we will add them to our score too... |
69 |
|
|
elseif (!isset($access['referer'][$referer_light]) && isset($access['referer'][$result])) { |
70 |
|
|
isset($access['referer'][$res_no_slash]) ? bbc_sum_item($result, $res_no_slash) : ""; |
71 |
|
|
|
72 |
|
|
++$access['referer'][$result]; |
73 |
|
|
} |
74 |
|
|
// same here... |
75 |
|
|
elseif (isset($access['referer'][$referer_light]) && !isset($access['referer'][$result])) { |
76 |
|
|
isset($access['referer'][$res_no_slash]) ? bbc_sum_item($referer_light, $res_no_slash) : ""; |
77 |
|
|
|
78 |
|
|
++$access['referer'][$referer_light]; |
79 |
|
|
} |
80 |
|
|
// Now we got both of them, let's continue with the one we got most of |
81 |
|
|
else { |
82 |
|
|
if ($access['referer'][$referer_light] < $access['referer'][$result]) { |
83 |
|
|
bbc_sum_item($result, $referer_light); |
84 |
|
|
|
85 |
|
|
++$access['referer'][$result]; |
86 |
|
|
} |
87 |
|
|
else { |
88 |
|
|
bbc_sum_item($referer_light, $result); |
89 |
|
|
|
90 |
|
|
++$access['referer'][$referer_light]; |
91 |
|
|
} |
92 |
|
|
} |
93 |
|
|
} |
94 |
|
|
|
95 |
|
|
function bbc_fix_refer_stat($refsum) { |
96 |
|
|
global $access; |
97 |
|
|
|
98 |
|
|
$access['bugs']['ref_fix'] = 1; |
99 |
|
|
$access['referer']['not_specified'] = $access['stat']['totalcount'] - $refsum + $access['referer']['not_specified']; |
100 |
|
|
} |
101 |
|
|
|
102 |
|
|
function bbc_get_sep($query, $array) { |
103 |
|
|
// puts the query into an array |
104 |
|
|
|
105 |
|
|
foreach ($array as $match) { |
106 |
|
|
$has_sep = (strpos($query, $match) !== false) ? true : false; |
107 |
|
|
$pool = $has_sep ? explode($match, $query) : array($query); |
108 |
|
|
|
109 |
|
|
for ($i = 0, $max = count($pool); $i < $max; $i++) { |
110 |
|
|
// unsafe decoded characters could hose our stats |
111 |
|
|
$pool[$i] = preg_replace("%^[`\'\"<>@\^\!\?/\(\)\[\]\{\}|+*~#;,.:_\-]+%", "", $pool[$i]); |
112 |
|
|
$pool[$i] = preg_replace("%[`\'\"<>@\^\!\?/\(\)\[\]\{\}|+*~#;,.:_\-]+$%", "", $pool[$i]); |
113 |
|
|
|
114 |
|
|
if (empty($pool[$i]) || (strlen($pool[$i]) < 2)) { |
115 |
|
|
unset($pool[$i]); |
116 |
|
|
continue; |
117 |
|
|
} |
118 |
|
|
} |
119 |
|
|
if ($has_sep) return array_values($pool); |
120 |
|
|
} |
121 |
|
|
return array_values($pool); |
122 |
|
|
} |
123 |
|
|
|
124 |
|
|
function bbc_get_search($array) { |
125 |
|
|
// turns variable assignments to an associative array |
126 |
|
|
$result = false; |
127 |
|
|
$query = array( |
128 |
|
|
"^as_(ep|o|e)?q=", |
129 |
|
|
"^q(_(a(ll|ny)|phrase|not)|s|t|u(ery)?)?=", |
130 |
|
|
"^s(u|2f|p\-q|earch(_?for)?|tring|zukaj)?=", |
131 |
|
|
"^k(w|e(reses|y(word)?s?))=", |
132 |
|
|
"^b(egriff|uscar?)=", |
133 |
|
|
"^w(d|ords?)?=", |
134 |
|
|
"^te(rms?|xt)=", |
135 |
|
|
"^mi?t=", |
136 |
|
|
"^heureka=", |
137 |
|
|
"^p=", |
138 |
|
|
"^r(eq)?=", |
139 |
|
|
"/search/web/", |
140 |
|
|
"^userQuery=", |
141 |
|
|
"^v[aeop]=" |
142 |
|
|
); |
143 |
|
|
|
144 |
|
|
foreach ($array as $string) { |
145 |
|
|
$string = urldecode($string); |
146 |
|
|
|
147 |
|
|
// skip empty GET variables |
148 |
|
|
if (substr($string, -1) == "=") continue; |
149 |
|
|
|
150 |
|
|
foreach ($query as $key) { |
151 |
|
|
preg_match(":$key:", $string, $matches); |
152 |
|
|
if (empty($matches)) continue; |
153 |
|
|
|
154 |
|
|
$par = $matches[0]; |
155 |
|
|
$pos = strpos($string, $par); |
156 |
|
|
$term = substr($string, ($pos + strlen($par))); |
157 |
|
|
|
158 |
|
|
if (strlen($term) < 2) { |
159 |
|
|
$matches = array(); |
160 |
|
|
continue; |
161 |
|
|
} |
162 |
|
|
|
163 |
|
|
if (($par[0] == "q") || ($par[0] == "s")) return $term; |
164 |
|
|
} |
165 |
|
|
$result = (!empty($matches)) ? $term : $result; |
166 |
|
|
} |
167 |
|
|
return $result; |
168 |
|
|
} |
169 |
|
|
|
170 |
|
|
function bbc_get_keywords($ref) { |
171 |
|
|
global $BBC_CUSTOM_CHARSET, $access; |
172 |
|
|
|
173 |
|
|
$var_sep = array("&", "|"); |
174 |
|
|
$word_sep = array( "+", " ", "/"); |
175 |
|
|
$match = array( |
176 |
|
|
"ara", "busca", "pesquis", "search", "srch", "seek", "zoek", "result", "szuka", "cherch", "such", "find", |
177 |
|
|
"trouve", "trova", "pursuit", "keres", "katalogus", "alltheinternet.com", "mamma.com", "baidu.com", "heureka.hu", |
178 |
|
|
"kartoo.com", "ask.com", "aport.ru", "google", "yahoo" |
179 |
|
|
); |
180 |
|
|
|
181 |
|
|
foreach ($match as $key) { |
182 |
|
|
// if string occurs at the beginning strpos() returns integer 0, if it can't be |
183 |
|
|
// found at all, however, it returns boolean false => definition required which |
184 |
|
|
// considers 0 as true |
185 |
|
|
$is_search = (strpos(strtolower($ref), $key) !== false) ? true : false; |
186 |
|
|
|
187 |
|
|
if ($is_search) break; |
188 |
|
|
} |
189 |
|
|
|
190 |
|
|
if (!$is_search) return false; |
191 |
|
|
|
192 |
|
|
$ref = str_replace("&", "&", urldecode($ref)); |
193 |
|
|
$is_query = strrpos($ref, "?"); |
194 |
|
|
$ref = ($is_query !== false) ? substr($ref, ++$is_query) : substr($ref, (strpos($ref, "://") + 3)); |
195 |
|
|
$get_vars = bbc_get_sep($ref, $var_sep); |
196 |
|
|
$raw_search = bbc_get_search($get_vars); |
197 |
|
|
|
198 |
|
|
if ($raw_search === false) return false; |
199 |
|
|
|
200 |
|
|
// Conversion of keywords, if applicable |
201 |
|
|
$from = extension_loaded("mbstring") ? bbc_get_encoding($raw_search) : false; |
202 |
|
|
$char = (!empty($BBC_CUSTOM_CHARSET)) ? $BBC_CUSTOM_CHARSET : false; |
203 |
|
|
$raw_search = (($from !== false) || extension_loaded("recode")) ? |
204 |
|
|
bbc_convert_lang($raw_search, $from, $char) : $raw_search; |
205 |
|
|
$flt_search = bbc_get_sep($raw_search, $word_sep); |
206 |
|
|
|
207 |
|
|
for ($i = 0, $j = count($flt_search); $i < $j; $i++) { |
208 |
|
|
// Filter search engine cache indicator |
209 |
|
|
if ((strlen($flt_search[$i]) > 50) || (strlen($flt_search[$i]) < 2) || |
210 |
|
|
(preg_match("#^(cache|tbn)\:[a-z0-9_\-]{8,16}\:#", $flt_search[$i]))) { |
211 |
|
|
unset($flt_search[$i]); |
212 |
|
|
continue; |
213 |
|
|
} |
214 |
|
|
|
215 |
|
|
// strtolower messes up UTF-8 so we leave things case sensitive if it's |
216 |
|
|
// requested as charset |
217 |
|
|
$flt_search[$i] = (!$char || (stristr($char, "UTF") === false)) ? |
218 |
|
|
strtolower(bbc_clean($flt_search[$i])) : bbc_clean($flt_search[$i]); |
219 |
|
|
|
220 |
|
|
$access['key'][($flt_search[$i])] = !isset($access['key'][($flt_search[$i])]) ? 1 : |
221 |
|
|
++$access['key'][($flt_search[$i])]; |
222 |
|
|
} |
223 |
|
|
return (!empty($flt_search) ? $flt_search : false); |
224 |
|
|
} |
225 |
|
|
?> |