1 |
<?php |
2 |
/* This file is part of BBClone (A PHP based Web Counter on Steroids) |
3 |
* |
4 |
* SVN FILE $Id$ |
5 |
* |
6 |
* Copyright (C) 2001-2013, the BBClone Team (see doc/authors.txt for details) |
7 |
* |
8 |
* This program is free software: you can redistribute it and/or modify |
9 |
* it under the terms of the GNU General Public License as published by |
10 |
* the Free Software Foundation, either version 3 of the License, or |
11 |
* (at your option) any later version. |
12 |
* |
13 |
* This program is distributed in the hope that it will be useful, |
14 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 |
* GNU General Public License for more details. |
17 |
* |
18 |
* See doc/copying.txt for details |
19 |
*/ |
20 |
|
21 |
////////////// |
22 |
// Referrer // |
23 |
////////////// |
24 |
|
25 |
// referer stats |
26 |
function bbc_sum_item($keep, $remove) { |
27 |
global $access; |
28 |
|
29 |
$access['referer'][$keep] += $access['referer'][$remove]; |
30 |
|
31 |
unset($access['referer'][$remove]); |
32 |
} |
33 |
|
34 |
function bbc_update_referer_stat($referer) { |
35 |
global $access; |
36 |
|
37 |
if ($referer == "ignored") { |
38 |
$access['referer']['ignored'] = !isset($access['referer']['ignored']) ? 1 : ++$access['referer']['ignored']; |
39 |
return; |
40 |
} |
41 |
|
42 |
$referer_light = substr(strstr($referer, "://"), 3); |
43 |
|
44 |
if (empty($referer_light)) { |
45 |
$access['referer']['not_specified'] = !isset($access['referer']['not_specified']) ? 1 : |
46 |
++$access['referer']['not_specified']; |
47 |
return; |
48 |
} |
49 |
|
50 |
if (($qm = strpos($referer_light, "?")) !== false) $referer_light = substr($referer_light, 0, $qm); |
51 |
if ((($parent = dirname($referer_light)) == ".") || ($parent === false)) { |
52 |
$referer_light = (substr($referer_light, -1) == "/") ? $referer_light : $referer_light."/"; |
53 |
} |
54 |
else $referer_light = $parent."/"; |
55 |
|
56 |
// compare whether we got a "www.*" equivalent recorded (or missing) |
57 |
$prefix = substr($referer_light, 0, ($tmp = strpos($referer_light, "."))); |
58 |
$suffix = substr($referer_light, ++$tmp); |
59 |
$result = ($prefix != "www") ? "www.".$referer_light : $suffix; |
60 |
// patterns for old records without trailing slashes |
61 |
$ref_no_slash = substr($referer_light, 0, -1); |
62 |
$res_no_slash = substr($result, 0, -1); |
63 |
|
64 |
// neither recorded with "www." nor without, seems to be our 1st visit ;) |
65 |
if (!isset($access['referer'][$referer_light]) && !isset($access['referer'][$result])) { |
66 |
$access['referer'][$referer_light] = 1; |
67 |
} |
68 |
// if there are old records without trailing slashes we will add them to our score too... |
69 |
elseif (!isset($access['referer'][$referer_light]) && isset($access['referer'][$result])) { |
70 |
isset($access['referer'][$res_no_slash]) ? bbc_sum_item($result, $res_no_slash) : ""; |
71 |
|
72 |
++$access['referer'][$result]; |
73 |
} |
74 |
// same here... |
75 |
elseif (isset($access['referer'][$referer_light]) && !isset($access['referer'][$result])) { |
76 |
isset($access['referer'][$res_no_slash]) ? bbc_sum_item($referer_light, $res_no_slash) : ""; |
77 |
|
78 |
++$access['referer'][$referer_light]; |
79 |
} |
80 |
// Now we got both of them, let's continue with the one we got most of |
81 |
else { |
82 |
if ($access['referer'][$referer_light] < $access['referer'][$result]) { |
83 |
bbc_sum_item($result, $referer_light); |
84 |
|
85 |
++$access['referer'][$result]; |
86 |
} |
87 |
else { |
88 |
bbc_sum_item($referer_light, $result); |
89 |
|
90 |
++$access['referer'][$referer_light]; |
91 |
} |
92 |
} |
93 |
} |
94 |
|
95 |
function bbc_fix_refer_stat($refsum) { |
96 |
global $access; |
97 |
|
98 |
$access['bugs']['ref_fix'] = 1; |
99 |
$access['referer']['not_specified'] = $access['stat']['totalcount'] - $refsum + $access['referer']['not_specified']; |
100 |
} |
101 |
|
102 |
function bbc_get_sep($query, $array) { |
103 |
// puts the query into an array |
104 |
|
105 |
foreach ($array as $match) { |
106 |
$has_sep = (strpos($query, $match) !== false) ? true : false; |
107 |
$pool = $has_sep ? explode($match, $query) : array($query); |
108 |
|
109 |
for ($i = 0, $max = count($pool); $i < $max; $i++) { |
110 |
// unsafe decoded characters could hose our stats |
111 |
$pool[$i] = preg_replace("%^[`\'\"<>@\^\!\?/\(\)\[\]\{\}|+*~#;,.:_\-]+%", "", $pool[$i]); |
112 |
$pool[$i] = preg_replace("%[`\'\"<>@\^\!\?/\(\)\[\]\{\}|+*~#;,.:_\-]+$%", "", $pool[$i]); |
113 |
|
114 |
if (empty($pool[$i]) || (strlen($pool[$i]) < 2)) { |
115 |
unset($pool[$i]); |
116 |
continue; |
117 |
} |
118 |
} |
119 |
if ($has_sep) return array_values($pool); |
120 |
} |
121 |
return array_values($pool); |
122 |
} |
123 |
|
124 |
function bbc_get_search($array) { |
125 |
// turns variable assignments to an associative array |
126 |
$result = false; |
127 |
$query = array( |
128 |
"^as_(ep|o|e)?q=", |
129 |
"^q(_(a(ll|ny)|phrase|not)|s|t|u(ery)?)?=", |
130 |
"^s(u|2f|p\-q|earch(_?for)?|tring|zukaj)?=", |
131 |
"^k(w|e(reses|y(word)?s?))=", |
132 |
"^b(egriff|uscar?)=", |
133 |
"^w(d|ords?)?=", |
134 |
"^te(rms?|xt)=", |
135 |
"^mi?t=", |
136 |
"^heureka=", |
137 |
"^p=", |
138 |
"^r(eq)?=", |
139 |
"/search/web/", |
140 |
"^userQuery=", |
141 |
"^v[aeop]=" |
142 |
); |
143 |
|
144 |
foreach ($array as $string) { |
145 |
$string = urldecode($string); |
146 |
|
147 |
// skip empty GET variables |
148 |
if (substr($string, -1) == "=") continue; |
149 |
|
150 |
foreach ($query as $key) { |
151 |
preg_match(":$key:", $string, $matches); |
152 |
if (empty($matches)) continue; |
153 |
|
154 |
$par = $matches[0]; |
155 |
$pos = strpos($string, $par); |
156 |
$term = substr($string, ($pos + strlen($par))); |
157 |
|
158 |
if (strlen($term) < 2) { |
159 |
$matches = array(); |
160 |
continue; |
161 |
} |
162 |
|
163 |
if (($par[0] == "q") || ($par[0] == "s")) return $term; |
164 |
} |
165 |
$result = (!empty($matches)) ? $term : $result; |
166 |
} |
167 |
return $result; |
168 |
} |
169 |
|
170 |
function bbc_get_keywords($ref) { |
171 |
global $BBC_CUSTOM_CHARSET, $access; |
172 |
|
173 |
$var_sep = array("&", "|"); |
174 |
$word_sep = array( "+", " ", "/"); |
175 |
$match = array( |
176 |
"ara", "busca", "pesquis", "search", "srch", "seek", "zoek", "result", "szuka", "cherch", "such", "find", |
177 |
"trouve", "trova", "pursuit", "keres", "katalogus", "alltheinternet.com", "mamma.com", "baidu.com", "heureka.hu", |
178 |
"kartoo.com", "ask.com", "aport.ru", "google", "yahoo" |
179 |
); |
180 |
|
181 |
foreach ($match as $key) { |
182 |
// if string occurs at the beginning strpos() returns integer 0, if it can't be |
183 |
// found at all, however, it returns boolean false => definition required which |
184 |
// considers 0 as true |
185 |
$is_search = (strpos(strtolower($ref), $key) !== false) ? true : false; |
186 |
|
187 |
if ($is_search) break; |
188 |
} |
189 |
|
190 |
if (!$is_search) return false; |
191 |
|
192 |
$ref = str_replace("&", "&", urldecode($ref)); |
193 |
$is_query = strrpos($ref, "?"); |
194 |
$ref = ($is_query !== false) ? substr($ref, ++$is_query) : substr($ref, (strpos($ref, "://") + 3)); |
195 |
$get_vars = bbc_get_sep($ref, $var_sep); |
196 |
$raw_search = bbc_get_search($get_vars); |
197 |
|
198 |
if ($raw_search === false) return false; |
199 |
|
200 |
// Conversion of keywords, if applicable |
201 |
$from = extension_loaded("mbstring") ? bbc_get_encoding($raw_search) : false; |
202 |
$char = (!empty($BBC_CUSTOM_CHARSET)) ? $BBC_CUSTOM_CHARSET : false; |
203 |
$raw_search = (($from !== false) || extension_loaded("recode")) ? |
204 |
bbc_convert_lang($raw_search, $from, $char) : $raw_search; |
205 |
$flt_search = bbc_get_sep($raw_search, $word_sep); |
206 |
|
207 |
for ($i = 0, $j = count($flt_search); $i < $j; $i++) { |
208 |
// Filter search engine cache indicator |
209 |
if ((strlen($flt_search[$i]) > 50) || (strlen($flt_search[$i]) < 2) || |
210 |
(preg_match("#^(cache|tbn)\:[a-z0-9_\-]{8,16}\:#", $flt_search[$i]))) { |
211 |
unset($flt_search[$i]); |
212 |
continue; |
213 |
} |
214 |
|
215 |
// strtolower messes up UTF-8 so we leave things case sensitive if it's |
216 |
// requested as charset |
217 |
$flt_search[$i] = (!$char || (stristr($char, "UTF") === false)) ? |
218 |
strtolower(bbc_clean($flt_search[$i])) : bbc_clean($flt_search[$i]); |
219 |
|
220 |
$access['key'][($flt_search[$i])] = !isset($access['key'][($flt_search[$i])]) ? 1 : |
221 |
++$access['key'][($flt_search[$i])]; |
222 |
} |
223 |
return (!empty($flt_search) ? $flt_search : false); |
224 |
} |
225 |
?> |