ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/BBClone/branches/0.6.2-Cust/log_processor.php
Revision: 193
Committed: Tue Dec 31 13:01:33 2013 UTC (10 years, 10 months ago) by matthys
File size: 12280 byte(s)
Log Message:
Updated bbc_update_host_stat for IPv6 and double TLD names

File Contents

# Content
1 <?php
2 /* This file is part of BBClone (A PHP based Web Counter on Steroids)
3 *
4 * SVN FILE $Id$
5 *
6 * Copyright (C) 2001-2014, the BBClone Team (see doc/authors.txt for details)
7 *
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * See doc/copying.txt for details
19 */
20
21 ///////////////////
22 // Log Processes //
23 ///////////////////
24
25 // Checking where we came from
26 if (!defined("_MARK_PAGE")) return;
27
28 // used by usort()
29 function bbc_sort_time_sc($row_a, $row_b) {
30 if ($row_a['time'] == $row_b['time']) return 0;
31 return ($row_a['time'] > $row_b['time'] ) ? 1 : -1;
32 }
33
34 // return the key of a value
35 function bbc_get_key($array, $str) {
36 reset($array);
37
38 while (list($idx, $val) = each($array)) {
39 if ($val == $str) return $idx;
40 }
41 return 0;
42 }
43
44 // purge host and referrer stats at request
45 function bbc_purge_single() {
46 global $access;
47
48 foreach (array("host", "key", "referer") as $cat) {
49 reset($access[$cat]);
50
51 while (list($key, $score) = each($access[$cat])) {
52 if ($score == 1) {
53 unset($access[$cat][$key]);
54 ($cat == "referer") ? ++$access[$cat]['not_specified'] : "";
55 }
56 }
57 }
58 }
59
60 // records the hosts that visited us most. Note, that we only pick up hostnames and strip
61 // any sort of prefix because else the listing would become rather useless
62
63 if ((empty($BBC_IGNORE_BOTS)) || (!isset($client['robot']))) {
64 $is_num = ($client['dns'] == $client['ip']) ? 1 : 0;
65 $host = (!$is_num) ? explode(".", strtolower($client['dns'])) : trim(substr($client['ip'], 0,
66 strrpos($client['ip'], "."))).".&nbsp;-";
67 $parts = (!$is_num) ? count($host) : 0;
68 // these ones can have less than 3 characters as hostname
69 $glob = array("org", "com", "edu", "mil", "net", "gov", "int");
70
71 if ($parts > 2) {
72 $host = ((!in_array(trim($host[($parts - 1)]), $glob)) && (((strlen(trim($host[($parts - 2)])) < 3)) ||
73 (in_array($host[($parts - 2)], $glob)))) ?
74 trim(implode(".", array_splice($host, -3))) : trim(implode(".", array_splice($host, -2)));
75 }
76 else $host = (!$is_num) ? trim(implode(".", $host)) : $host;
77 }
78
79 function bbc_update_host_stat($client) {
80 global $access, $BBC_IGNORE_BOTS, $BBC_DOUBLE_TLD;
81
82 if ((empty($BBC_IGNORE_BOTS)) || (!isset($client['robot']))) {
83 $is_num = ($client['dns'] == $client['ip']) ? 1 : 0;
84 if ($is_num) {
85 // Numeric
86 if (strpos($client['ip'], ":") === false) {
87 // IPv4
88 $host = trim(substr($client['ip'], 0, strrpos($client['ip'], "."))).".&nbsp;-";
89 } else {
90 // IPv6
91 $parts = explode( ":", $client['ip']);
92 $parts = array_slice( $parts, 0, 4);
93 $host = implode( ":", $parts) . ":&nbsp;-";
94 }
95 } else {
96 // DNS name
97 $double_toplevels = explode(",", $BBC_DOUBLE_TLD);
98 $parts = explode( ".", $client['dns']);
99 $lastparts = implode( ".", array_slice( $parts, -2, 2) );
100 if (in_array($lastparts, $double_toplevels)) {
101 // We have a double tld like co.uk
102 $host = implode( ".", array_slice( $parts, -4, 4) );
103 } else {
104 // We have a normal tld
105 $host = implode( ".", array_slice( $parts, -3, 3) );
106 }
107 // Remove optional www* from start
108 $host = preg_replace("/^www[0-9]*\./", "", $host);
109 }
110 }
111 if ((empty($BBC_IGNORE_BOTS)) || (!isset($client['robot']))) {
112 if (!isset($access['host'][$host])) $access['host'][$host] = 0;
113 $access['host'][$host]++;
114 }
115
116 if (isset($access['host']['not_specified'])) unset($access['host']['not_specified']);
117 }
118
119 // the listing of the visited pages
120 function bbc_update_visits($time, $page, $nr) {
121 global $BBC_MAXVISIBLE, $last;
122
123 $lv = count($last['traffic'][$nr]['views']) - 1;
124 $last_time = substr($last['traffic'][$nr]['views'][$lv], 0, strpos($last['traffic'][$nr]['views'][$lv], "|"));
125 $last_cnt = substr($last['traffic'][$nr]['views'][$lv], (strrpos($last['traffic'][$nr]['views'][$lv], "|") + 1));
126 $last_page = substr($last['traffic'][$nr]['views'][$lv], (strpos($last['traffic'][$nr]['views'][$lv], "|") + 1));
127 $last_page = substr($last_page, 0, strpos($last_page, "|"));
128 $last['traffic'][$nr]['off'] = !empty($last['traffic'][$nr]['off']) ? $last['traffic'][$nr]['off'] : 0;
129
130 if ((empty($last['traffic'][$nr]['views'])) || (!is_array($last['traffic'][$nr]['views']))) return;
131
132 if (intval($last_page) === intval($page)) {
133 $last['traffic'][$nr]['views'][$lv] = "$last_time|$last_page|".++$last_cnt;
134 $last['traffic'][$nr]['off']++;
135 }
136 else $last['traffic'][$nr]['views'][] = "$time|$page|1";
137
138 sort($last['traffic'][$nr]['views']);
139
140 // number of elements to be removed with array_splice() if necessary
141 $lv = count($last['traffic'][$nr]['views']) - 1;
142 $del = (($lv + 1) > $BBC_MAXVISIBLE) ? (($lv + 1) - $BBC_MAXVISIBLE) : false;
143 $last['traffic'][$nr]['views'] = ($del !== false) ? array_splice($last['traffic'][$nr]['views'], $del) :
144 $last['traffic'][$nr]['views'];
145 }
146
147 // The most visited pages ranking
148 function bbc_update_page_stats($connect) {
149 global $access, $last;
150
151 $long_page = $connect['page'];
152 $over_60 = (strlen($long_page) > 60) ? 1 : 0;
153 $connect['page'] = $over_60 ? "...".substr($long_page, -57) : $long_page;
154
155 // Fix oversized page titles
156 if (($over_60) && (isset($access['page'][$long_page]['count']))) {
157 $access['page'][($connect['page'])]['count'] = $access['page'][$long_page]['count'];
158 $access['page'][($connect['page'])]['uri'] = $access['page'][$long_page]['uri'];
159 unset($access['page'][$long_page]);
160 }
161
162
163 if (!isset($access['page'][($connect['page'])]['count'])) {
164 $access['page'][($connect['page'])]['count'] = 0;
165 }
166
167 $access['page'][($connect['page'])]['count']++;
168 $access['page'][($connect['page'])]['uri'] = $connect['uri'];
169
170 $last['pages'] = ((empty($last['pages'])) || (!is_array($last['pages']))) ? array() : $last['pages'];
171
172 if (($over_60) && (in_array($long_page, $last['pages']))) {
173 $last['pages'][bbc_get_key($last['pages'], $long_page)] = $connect['page'];
174 }
175 if (!in_array($connect['page'], $last['pages'])) $last['pages'][] = $connect['page'];
176
177 $connect['page'] = bbc_get_key($last['pages'], $connect['page']);
178
179 if (isset($connect['uri'])) unset($connect['uri']);
180
181 return $connect;
182 }
183
184 // Transfer the raw data from the main counters of var into $last.
185 // Any new data (more recent than $BBC_MAXTIME) is used in the global stats
186 function bbc_add_new_connections_to_old() {
187 global $BBC_IGNORE_AGENT, $BBC_IGNORE_BOTS, $BBC_MAXTIME, $BBC_MAXVISIBLE, $BBC_NO_DNS, $BBC_NO_HITS,
188 $BBC_PURGE_SINGLE, $access, $last;
189
190 // Checking whether we have new connections
191 if (!$new_access = bbc_counter_to_array()) return false;
192
193 // cleanup if requested
194 !empty($BBC_PURGE_SINGLE) ? bbc_purge_single() : "";
195
196 ((!empty($access['time'])) && (is_array($access['time']))) ? bbc_time_offset() : "";
197
198 // Upgrade from older versions. We need to erase the "last" data.
199 if (isset($access['last'])) unset($access['last']);
200
201 //check for broken 0.4.2 referrer counting and apply fix if necessary
202 if (isset($access) && !isset($access['bugs']['ref_fix']) && isset($access['referer']['not_specified'])) {
203 bbc_fix_refer_stat(array_sum($access['referer']));
204 }
205
206 // fix wrong browser assignments
207 foreach (array("java", "wwwc", "libwww") as $what) {
208 if (isset($access) && isset($access['stat']['browser'][$what])) {
209 $access['stat']['robot'][$what] = $access['stat']['browser'][$what];
210 $access['stat']['os']['other'] -= $access['stat']['robot'][$what];
211
212 unset($access['stat']['browser'][$what]);
213 }
214 }
215
216 $nb_new_access = (!empty($new_access) && is_array($new_access)) ? count($new_access) : 0;
217 $nb_last_access = (!empty($last['traffic']) && is_array($last['traffic'])) ? count($last['traffic']) : 0;
218
219 foreach ($new_access as $connect) {
220 $connect = bbc_update_connect($connect);
221
222 // the "last reset on" flag initialisation
223 if ((!isset($access['time'])) && (!isset($access['time']['reset']))) {
224 $access['time']['reset'] = $connect['time'];
225 }
226
227 // Stop processing if bots are completely ignored
228 if ((!empty($BBC_IGNORE_BOTS)) && ($BBC_IGNORE_BOTS == 2)) {
229 if (!empty($connect['robot'])) {
230 --$nb_new_access;
231 continue;
232 }
233 }
234 // Omit referrers coming from robots
235 $connect['referer'] = !empty($connect['robot']) ? "unknown" : $connect['referer'];
236
237 $this_connect = $connect['time'];
238 $last_connect = !empty($access['time']['last']) ? $access['time']['last'] : 0;
239
240 // Hits as base for time stats if desired
241 if (empty($BBC_NO_HITS)) bbc_update_time_stat($this_connect, $last_connect);
242
243 // The script viewed
244 $connect = isset($connect['page']) ? bbc_update_page_stats($connect) : $connect;
245 $prev_recorded = 0;
246
247 // Check if a similar connection has been recorded yet
248 for($l = $nb_last_access - 1; ($l >= 0) && (($connect['time'] - $last['traffic'][$l]['time']) < $BBC_MAXTIME);
249 $l--) {
250 if (!empty($BBC_IGNORE_AGENT) ? ($connect['ip'] == $last['traffic'][$l]['ip']) :
251 (($connect['ip'] == $last['traffic'][$l]['ip']) && ($connect['agent'] == $last['traffic'][$l]['agent']))) {
252 $last['traffic'][$l]['page'] = $connect['page'];
253 $last['traffic'][$l]['time'] = $this_connect;
254 $last['traffic'][$l]['visits']++;
255 $access['stat']['totalvisits']++;
256
257 ($BBC_MAXVISIBLE > 0) ? bbc_update_visits($connect['time'], $connect['page'], $l) : "";
258
259 $prev_recorded = 1;
260 break;
261 }
262 }
263
264 // Add new connection if it hasn't been recorded yet
265 if (!$prev_recorded) {
266 if (empty($access['stat']['totalvisits'])) $access['stat']['totalvisits'] = 0;
267 if (empty($access['stat']['totalcount'])) $access['stat']['totalcount'] = 0;
268
269 $connect['dns'] = !empty($BBC_NO_DNS) ? $connect['ip'] : bbc_clean(gethostbyaddr($connect['ip']));
270 $connect['ext'] = bbc_get_extension($connect['dns'], $connect['ip']);
271
272 $last['traffic'][$nb_last_access] = bbc_update_access($connect);
273 // Visit stats
274 $last['traffic'][$nb_last_access]['views'][] = $last['traffic'][$nb_last_access]['time']."|"
275 .$last['traffic'][$nb_last_access]['page']."|1";
276
277 // Unique visits as base for time stats if desired
278 if (!empty($BBC_NO_HITS)) bbc_update_time_stat($this_connect, $last_connect);
279
280 // Referrers collection will be updated all along with the keywords if available
281 if (isset($connect['referer'])) {
282 bbc_update_referer_stat($connect['referer']);
283 $flt_search = bbc_get_keywords($connect['referer']);
284 }
285 // The search as a whole in $last
286 $last['traffic'][$nb_last_access]['search'] = ($flt_search !== false) ? implode(" ", $flt_search) : "-";
287
288 // The host listing
289 if ((isset($connect['dns'])) && (isset($connect['ip']))) {
290 bbc_update_host_stat($last['traffic'][$nb_last_access]);
291 }
292
293 $access['stat']['totalvisits']++;
294 $access['stat']['totalcount']++;
295 $nb_last_access++;
296 }
297 }
298 return $nb_new_access;
299 }
300
301 // Remove unnecessary connections from $last, that either exceed the $BBC_MAXVISIBLE limit or are
302 // older than time() - $BBC_MAXTIME.
303 function bbc_update_last_access() {
304 global $last, $BBC_MAXTIME, $BBC_MAXVISIBLE, $BBC_TIMESTAMP, $BBC_TIME_OFFSET;
305
306 if (($BBC_MAXVISIBLE <= 0) || (empty($last['traffic'])) || (!is_array($last['traffic']))) {
307 $last['traffic'] = array();
308 return;
309 }
310 else {
311 $nb_connect = count($last['traffic']);
312 $ctime = $BBC_TIMESTAMP + ($BBC_TIME_OFFSET * 60);
313
314 for ($k = $nb_connect - 1 - $BBC_MAXVISIBLE; $k >= 0; $k--) {
315 if (($ctime - $last['traffic'][$k]['time']) > $BBC_MAXTIME) unset($last['traffic'][$k]);
316 }
317 usort($last['traffic'],"bbc_sort_time_sc");
318 }
319 }
320 ?>

Properties

Name Value
svn:keywords Id