ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/BBClone/branches/0.6.1-RC1/lib/referrer.php
Revision: 31
Committed: Wed Nov 27 17:23:11 2013 UTC (11 years ago) by matthys
File size: 7912 byte(s)
Log Message:
0.6.1-Release Candidate 1

File Contents

# Content
1 <?php
2 /* This file is part of BBClone (A PHP based Web Counter on Steroids)
3 *
4 * SVN FILE $Id$
5 *
6 * Copyright (C) 2001-2013, the BBClone Team (see doc/authors.txt for details)
7 *
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * See doc/copying.txt for details
19 */
20
21 //////////////
22 // Referrer //
23 //////////////
24
25 // referer stats
26 function bbc_sum_item($keep, $remove) {
27 global $access;
28
29 $access['referer'][$keep] += $access['referer'][$remove];
30
31 unset($access['referer'][$remove]);
32 }
33
34 function bbc_update_referer_stat($referer) {
35 global $access;
36
37 if ($referer == "ignored") {
38 $access['referer']['ignored'] = !isset($access['referer']['ignored']) ? 1 : ++$access['referer']['ignored'];
39 return;
40 }
41
42 $referer_light = substr(strstr($referer, "://"), 3);
43
44 if (empty($referer_light)) {
45 $access['referer']['not_specified'] = !isset($access['referer']['not_specified']) ? 1 :
46 ++$access['referer']['not_specified'];
47 return;
48 }
49
50 if (($qm = strpos($referer_light, "?")) !== false) $referer_light = substr($referer_light, 0, $qm);
51 if ((($parent = dirname($referer_light)) == ".") || ($parent === false)) {
52 $referer_light = (substr($referer_light, -1) == "/") ? $referer_light : $referer_light."/";
53 }
54 else $referer_light = $parent."/";
55
56 // compare whether we got a "www.*" equivalent recorded (or missing)
57 $prefix = substr($referer_light, 0, ($tmp = strpos($referer_light, ".")));
58 $suffix = substr($referer_light, ++$tmp);
59 $result = ($prefix != "www") ? "www.".$referer_light : $suffix;
60 // patterns for old records without trailing slashes
61 $ref_no_slash = substr($referer_light, 0, -1);
62 $res_no_slash = substr($result, 0, -1);
63
64 // neither recorded with "www." nor without, seems to be our 1st visit ;)
65 if (!isset($access['referer'][$referer_light]) && !isset($access['referer'][$result])) {
66 $access['referer'][$referer_light] = 1;
67 }
68 // if there are old records without trailing slashes we will add them to our score too...
69 elseif (!isset($access['referer'][$referer_light]) && isset($access['referer'][$result])) {
70 isset($access['referer'][$res_no_slash]) ? bbc_sum_item($result, $res_no_slash) : "";
71
72 ++$access['referer'][$result];
73 }
74 // same here...
75 elseif (isset($access['referer'][$referer_light]) && !isset($access['referer'][$result])) {
76 isset($access['referer'][$res_no_slash]) ? bbc_sum_item($referer_light, $res_no_slash) : "";
77
78 ++$access['referer'][$referer_light];
79 }
80 // Now we got both of them, let's continue with the one we got most of
81 else {
82 if ($access['referer'][$referer_light] < $access['referer'][$result]) {
83 bbc_sum_item($result, $referer_light);
84
85 ++$access['referer'][$result];
86 }
87 else {
88 bbc_sum_item($referer_light, $result);
89
90 ++$access['referer'][$referer_light];
91 }
92 }
93 }
94
95 function bbc_fix_refer_stat($refsum) {
96 global $access;
97
98 $access['bugs']['ref_fix'] = 1;
99 $access['referer']['not_specified'] = $access['stat']['totalcount'] - $refsum + $access['referer']['not_specified'];
100 }
101
102 function bbc_get_sep($query, $array) {
103 // puts the query into an array
104
105 foreach ($array as $match) {
106 $has_sep = (strpos($query, $match) !== false) ? true : false;
107 $pool = $has_sep ? explode($match, $query) : array($query);
108
109 for ($i = 0, $max = count($pool); $i < $max; $i++) {
110 // unsafe decoded characters could hose our stats
111 $pool[$i] = preg_replace("%^[`\'\"<>@\^\!\?/\(\)\[\]\{\}|+*~#;,.:_\-]+%", "", $pool[$i]);
112 $pool[$i] = preg_replace("%[`\'\"<>@\^\!\?/\(\)\[\]\{\}|+*~#;,.:_\-]+$%", "", $pool[$i]);
113
114 if (empty($pool[$i]) || (strlen($pool[$i]) < 2)) {
115 unset($pool[$i]);
116 continue;
117 }
118 }
119 if ($has_sep) return array_values($pool);
120 }
121 return array_values($pool);
122 }
123
124 function bbc_get_search($array) {
125 // turns variable assignments to an associative array
126 $result = false;
127 $query = array(
128 "^as_(ep|o|e)?q=",
129 "^q(_(a(ll|ny)|phrase|not)|s|t|u(ery)?)?=",
130 "^s(u|2f|p\-q|earch(_?for)?|tring|zukaj)?=",
131 "^k(w|e(reses|y(word)?s?))=",
132 "^b(egriff|uscar?)=",
133 "^w(d|ords?)?=",
134 "^te(rms?|xt)=",
135 "^mi?t=",
136 "^heureka=",
137 "^p=",
138 "^r(eq)?=",
139 "/search/web/",
140 "^userQuery=",
141 "^v[aeop]="
142 );
143
144 foreach ($array as $string) {
145 $string = urldecode($string);
146
147 // skip empty GET variables
148 if (substr($string, -1) == "=") continue;
149
150 foreach ($query as $key) {
151 preg_match(":$key:", $string, $matches);
152 if (empty($matches)) continue;
153
154 $par = $matches[0];
155 $pos = strpos($string, $par);
156 $term = substr($string, ($pos + strlen($par)));
157
158 if (strlen($term) < 2) {
159 $matches = array();
160 continue;
161 }
162
163 if (($par[0] == "q") || ($par[0] == "s")) return $term;
164 }
165 $result = (!empty($matches)) ? $term : $result;
166 }
167 return $result;
168 }
169
170 function bbc_get_keywords($ref) {
171 global $BBC_CUSTOM_CHARSET, $access;
172
173 $var_sep = array("&", "|");
174 $word_sep = array( "+", " ", "/");
175 $match = array(
176 "ara", "busca", "pesquis", "search", "srch", "seek", "zoek", "result", "szuka", "cherch", "such", "find",
177 "trouve", "trova", "pursuit", "keres", "katalogus", "alltheinternet.com", "mamma.com", "baidu.com", "heureka.hu",
178 "kartoo.com", "ask.com", "aport.ru", "google", "yahoo"
179 );
180
181 foreach ($match as $key) {
182 // if string occurs at the beginning strpos() returns integer 0, if it can't be
183 // found at all, however, it returns boolean false => definition required which
184 // considers 0 as true
185 $is_search = (strpos(strtolower($ref), $key) !== false) ? true : false;
186
187 if ($is_search) break;
188 }
189
190 if (!$is_search) return false;
191
192 $ref = str_replace("&amp;", "&", urldecode($ref));
193 $is_query = strrpos($ref, "?");
194 $ref = ($is_query !== false) ? substr($ref, ++$is_query) : substr($ref, (strpos($ref, "://") + 3));
195 $get_vars = bbc_get_sep($ref, $var_sep);
196 $raw_search = bbc_get_search($get_vars);
197
198 if ($raw_search === false) return false;
199
200 // Conversion of keywords, if applicable
201 $from = extension_loaded("mbstring") ? bbc_get_encoding($raw_search) : false;
202 $char = (!empty($BBC_CUSTOM_CHARSET)) ? $BBC_CUSTOM_CHARSET : false;
203 $raw_search = (($from !== false) || extension_loaded("recode")) ?
204 bbc_convert_lang($raw_search, $from, $char) : $raw_search;
205 $flt_search = bbc_get_sep($raw_search, $word_sep);
206
207 for ($i = 0, $j = count($flt_search); $i < $j; $i++) {
208 // Filter search engine cache indicator
209 if ((strlen($flt_search[$i]) > 50) || (strlen($flt_search[$i]) < 2) ||
210 (preg_match("#^(cache|tbn)\:[a-z0-9_\-]{8,16}\:#", $flt_search[$i]))) {
211 unset($flt_search[$i]);
212 continue;
213 }
214
215 // strtolower messes up UTF-8 so we leave things case sensitive if it's
216 // requested as charset
217 $flt_search[$i] = (!$char || (stristr($char, "UTF") === false)) ?
218 strtolower(bbc_clean($flt_search[$i])) : bbc_clean($flt_search[$i]);
219
220 $access['key'][($flt_search[$i])] = !isset($access['key'][($flt_search[$i])]) ? 1 :
221 ++$access['key'][($flt_search[$i])];
222 }
223 return (!empty($flt_search) ? $flt_search : false);
224 }
225 ?>

Properties

Name Value
svn:keywords Id