ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/BBClone/branches/0.6.1-RC1/lib/referrer.php
Revision: 31
Committed: Wed Nov 27 17:23:11 2013 UTC (11 years ago) by matthys
File size: 7912 byte(s)
Log Message:
0.6.1-Release Candidate 1

File Contents

# User Rev Content
1 matthys 15 <?php
2     /* This file is part of BBClone (A PHP based Web Counter on Steroids)
3     *
4 matthys 16 * SVN FILE $Id$
5 matthys 15 *
6 matthys 16 * Copyright (C) 2001-2013, the BBClone Team (see doc/authors.txt for details)
7 matthys 15 *
8     * This program is free software: you can redistribute it and/or modify
9     * it under the terms of the GNU General Public License as published by
10     * the Free Software Foundation, either version 3 of the License, or
11     * (at your option) any later version.
12     *
13     * This program is distributed in the hope that it will be useful,
14     * but WITHOUT ANY WARRANTY; without even the implied warranty of
15     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16     * GNU General Public License for more details.
17     *
18     * See doc/copying.txt for details
19     */
20    
21     //////////////
22     // Referrer //
23     //////////////
24    
25     // referer stats
26     function bbc_sum_item($keep, $remove) {
27     global $access;
28    
29     $access['referer'][$keep] += $access['referer'][$remove];
30    
31     unset($access['referer'][$remove]);
32     }
33    
34     function bbc_update_referer_stat($referer) {
35     global $access;
36    
37     if ($referer == "ignored") {
38     $access['referer']['ignored'] = !isset($access['referer']['ignored']) ? 1 : ++$access['referer']['ignored'];
39     return;
40     }
41    
42     $referer_light = substr(strstr($referer, "://"), 3);
43    
44     if (empty($referer_light)) {
45     $access['referer']['not_specified'] = !isset($access['referer']['not_specified']) ? 1 :
46     ++$access['referer']['not_specified'];
47     return;
48     }
49    
50     if (($qm = strpos($referer_light, "?")) !== false) $referer_light = substr($referer_light, 0, $qm);
51     if ((($parent = dirname($referer_light)) == ".") || ($parent === false)) {
52     $referer_light = (substr($referer_light, -1) == "/") ? $referer_light : $referer_light."/";
53     }
54     else $referer_light = $parent."/";
55    
56     // compare whether we got a "www.*" equivalent recorded (or missing)
57     $prefix = substr($referer_light, 0, ($tmp = strpos($referer_light, ".")));
58     $suffix = substr($referer_light, ++$tmp);
59     $result = ($prefix != "www") ? "www.".$referer_light : $suffix;
60     // patterns for old records without trailing slashes
61     $ref_no_slash = substr($referer_light, 0, -1);
62     $res_no_slash = substr($result, 0, -1);
63    
64     // neither recorded with "www." nor without, seems to be our 1st visit ;)
65     if (!isset($access['referer'][$referer_light]) && !isset($access['referer'][$result])) {
66     $access['referer'][$referer_light] = 1;
67     }
68     // if there are old records without trailing slashes we will add them to our score too...
69     elseif (!isset($access['referer'][$referer_light]) && isset($access['referer'][$result])) {
70     isset($access['referer'][$res_no_slash]) ? bbc_sum_item($result, $res_no_slash) : "";
71    
72     ++$access['referer'][$result];
73     }
74     // same here...
75     elseif (isset($access['referer'][$referer_light]) && !isset($access['referer'][$result])) {
76     isset($access['referer'][$res_no_slash]) ? bbc_sum_item($referer_light, $res_no_slash) : "";
77    
78     ++$access['referer'][$referer_light];
79     }
80     // Now we got both of them, let's continue with the one we got most of
81     else {
82     if ($access['referer'][$referer_light] < $access['referer'][$result]) {
83     bbc_sum_item($result, $referer_light);
84    
85     ++$access['referer'][$result];
86     }
87     else {
88     bbc_sum_item($referer_light, $result);
89    
90     ++$access['referer'][$referer_light];
91     }
92     }
93     }
94    
95     function bbc_fix_refer_stat($refsum) {
96     global $access;
97    
98     $access['bugs']['ref_fix'] = 1;
99     $access['referer']['not_specified'] = $access['stat']['totalcount'] - $refsum + $access['referer']['not_specified'];
100     }
101    
102     function bbc_get_sep($query, $array) {
103     // puts the query into an array
104    
105     foreach ($array as $match) {
106     $has_sep = (strpos($query, $match) !== false) ? true : false;
107     $pool = $has_sep ? explode($match, $query) : array($query);
108    
109     for ($i = 0, $max = count($pool); $i < $max; $i++) {
110     // unsafe decoded characters could hose our stats
111     $pool[$i] = preg_replace("%^[`\'\"<>@\^\!\?/\(\)\[\]\{\}|+*~#;,.:_\-]+%", "", $pool[$i]);
112     $pool[$i] = preg_replace("%[`\'\"<>@\^\!\?/\(\)\[\]\{\}|+*~#;,.:_\-]+$%", "", $pool[$i]);
113    
114     if (empty($pool[$i]) || (strlen($pool[$i]) < 2)) {
115     unset($pool[$i]);
116     continue;
117     }
118     }
119     if ($has_sep) return array_values($pool);
120     }
121     return array_values($pool);
122     }
123    
124     function bbc_get_search($array) {
125     // turns variable assignments to an associative array
126     $result = false;
127     $query = array(
128     "^as_(ep|o|e)?q=",
129     "^q(_(a(ll|ny)|phrase|not)|s|t|u(ery)?)?=",
130     "^s(u|2f|p\-q|earch(_?for)?|tring|zukaj)?=",
131     "^k(w|e(reses|y(word)?s?))=",
132     "^b(egriff|uscar?)=",
133     "^w(d|ords?)?=",
134     "^te(rms?|xt)=",
135     "^mi?t=",
136     "^heureka=",
137     "^p=",
138     "^r(eq)?=",
139     "/search/web/",
140     "^userQuery=",
141     "^v[aeop]="
142     );
143    
144     foreach ($array as $string) {
145     $string = urldecode($string);
146    
147     // skip empty GET variables
148     if (substr($string, -1) == "=") continue;
149    
150     foreach ($query as $key) {
151     preg_match(":$key:", $string, $matches);
152     if (empty($matches)) continue;
153    
154     $par = $matches[0];
155     $pos = strpos($string, $par);
156     $term = substr($string, ($pos + strlen($par)));
157    
158     if (strlen($term) < 2) {
159     $matches = array();
160     continue;
161     }
162    
163     if (($par[0] == "q") || ($par[0] == "s")) return $term;
164     }
165     $result = (!empty($matches)) ? $term : $result;
166     }
167     return $result;
168     }
169    
170     function bbc_get_keywords($ref) {
171     global $BBC_CUSTOM_CHARSET, $access;
172    
173     $var_sep = array("&", "|");
174     $word_sep = array( "+", " ", "/");
175     $match = array(
176     "ara", "busca", "pesquis", "search", "srch", "seek", "zoek", "result", "szuka", "cherch", "such", "find",
177     "trouve", "trova", "pursuit", "keres", "katalogus", "alltheinternet.com", "mamma.com", "baidu.com", "heureka.hu",
178     "kartoo.com", "ask.com", "aport.ru", "google", "yahoo"
179     );
180    
181     foreach ($match as $key) {
182     // if string occurs at the beginning strpos() returns integer 0, if it can't be
183     // found at all, however, it returns boolean false => definition required which
184     // considers 0 as true
185     $is_search = (strpos(strtolower($ref), $key) !== false) ? true : false;
186    
187     if ($is_search) break;
188     }
189    
190     if (!$is_search) return false;
191    
192     $ref = str_replace("&amp;", "&", urldecode($ref));
193     $is_query = strrpos($ref, "?");
194     $ref = ($is_query !== false) ? substr($ref, ++$is_query) : substr($ref, (strpos($ref, "://") + 3));
195     $get_vars = bbc_get_sep($ref, $var_sep);
196     $raw_search = bbc_get_search($get_vars);
197    
198     if ($raw_search === false) return false;
199    
200     // Conversion of keywords, if applicable
201     $from = extension_loaded("mbstring") ? bbc_get_encoding($raw_search) : false;
202     $char = (!empty($BBC_CUSTOM_CHARSET)) ? $BBC_CUSTOM_CHARSET : false;
203     $raw_search = (($from !== false) || extension_loaded("recode")) ?
204     bbc_convert_lang($raw_search, $from, $char) : $raw_search;
205     $flt_search = bbc_get_sep($raw_search, $word_sep);
206    
207     for ($i = 0, $j = count($flt_search); $i < $j; $i++) {
208     // Filter search engine cache indicator
209     if ((strlen($flt_search[$i]) > 50) || (strlen($flt_search[$i]) < 2) ||
210     (preg_match("#^(cache|tbn)\:[a-z0-9_\-]{8,16}\:#", $flt_search[$i]))) {
211     unset($flt_search[$i]);
212     continue;
213     }
214    
215     // strtolower messes up UTF-8 so we leave things case sensitive if it's
216     // requested as charset
217     $flt_search[$i] = (!$char || (stristr($char, "UTF") === false)) ?
218     strtolower(bbc_clean($flt_search[$i])) : bbc_clean($flt_search[$i]);
219    
220     $access['key'][($flt_search[$i])] = !isset($access['key'][($flt_search[$i])]) ? 1 :
221     ++$access['key'][($flt_search[$i])];
222     }
223     return (!empty($flt_search) ? $flt_search : false);
224     }
225     ?>

Properties

Name Value
svn:keywords Id