ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/BBClone/trunk/log_processor.php
Revision: 18
Committed: Thu Nov 21 13:13:49 2013 UTC (11 years ago) by matthys
File size: 11578 byte(s)
Log Message:
Setup root files.

File Contents

# User Rev Content
1 matthys 18 <?php
2     /* This file is part of BBClone (A PHP based Web Counter on Steroids)
3     *
4     * SVN FILE $Id: authors.txt 6 2013-11-21 12:36:57Z matthys $
5     *
6     * Copyright (C) 2001-2013, the BBClone Team (see doc/authors.txt for details)
7     *
8     * This program is free software: you can redistribute it and/or modify
9     * it under the terms of the GNU General Public License as published by
10     * the Free Software Foundation, either version 3 of the License, or
11     * (at your option) any later version.
12     *
13     * This program is distributed in the hope that it will be useful,
14     * but WITHOUT ANY WARRANTY; without even the implied warranty of
15     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16     * GNU General Public License for more details.
17     *
18     * See doc/copying.txt for details
19     */
20    
21     ///////////////////
22     // Log Processes //
23     ///////////////////
24    
25     // Checking where we came from
26     if (!defined("_MARK_PAGE")) return;
27    
28     // used by usort()
29     function bbc_sort_time_sc($row_a, $row_b) {
30     if ($row_a['time'] == $row_b['time']) return 0;
31     return ($row_a['time'] > $row_b['time'] ) ? 1 : -1;
32     }
33    
34     // return the key of a value
35     function bbc_get_key($array, $str) {
36     reset($array);
37    
38     while (list($idx, $val) = each($array)) {
39     if ($val == $str) return $idx;
40     }
41     return 0;
42     }
43    
44     // purge host and referrer stats at request
45     function bbc_purge_single() {
46     global $access;
47    
48     foreach (array("host", "key", "referer") as $cat) {
49     reset($access[$cat]);
50    
51     while (list($key, $score) = each($access[$cat])) {
52     if ($score == 1) {
53     unset($access[$cat][$key]);
54     ($cat == "referer") ? ++$access[$cat]['not_specified'] : "";
55     }
56     }
57     }
58     }
59    
60     // records the hosts that visited us most. Note, that we only pick up hostnames and strip
61     // any sort of prefix because else the listing would become rather useless
62     function bbc_update_host_stat($client) {
63     global $access, $BBC_IGNORE_BOTS;
64    
65     if ((empty($BBC_IGNORE_BOTS)) || (!isset($client['robot']))) {
66     $is_num = ($client['dns'] == $client['ip']) ? 1 : 0;
67     $host = (!$is_num) ? explode(".", strtolower($client['dns'])) : trim(substr($client['ip'], 0,
68     strrpos($client['ip'], "."))).".&nbsp;-";
69     $parts = (!$is_num) ? count($host) : 0;
70     // these ones can have less than 3 characters as hostname
71     $glob = array("org", "com", "edu", "mil", "net", "gov", "int");
72    
73     if ($parts > 2) {
74     $host = ((!in_array(trim($host[($parts - 1)]), $glob)) && (((strlen(trim($host[($parts - 2)])) < 3)) ||
75     (in_array($host[($parts - 2)], $glob)))) ?
76     trim(implode(".", array_splice($host, -3))) : trim(implode(".", array_splice($host, -2)));
77     }
78     else $host = (!$is_num) ? trim(implode(".", $host)) : $host;
79     }
80    
81     if ((empty($BBC_IGNORE_BOTS)) || (!isset($client['robot']))) {
82     if (!isset($access['host'][$host])) $access['host'][$host] = 0;
83     $access['host'][$host]++;
84     }
85    
86     if (isset($access['host']['not_specified'])) unset($access['host']['not_specified']);
87     }
88    
89     // the listing of the visited pages
90     function bbc_update_visits($time, $page, $nr) {
91     global $BBC_MAXVISIBLE, $last;
92    
93     $lv = count($last['traffic'][$nr]['views']) - 1;
94     $last_time = substr($last['traffic'][$nr]['views'][$lv], 0, strpos($last['traffic'][$nr]['views'][$lv], "|"));
95     $last_cnt = substr($last['traffic'][$nr]['views'][$lv], (strrpos($last['traffic'][$nr]['views'][$lv], "|") + 1));
96     $last_page = substr($last['traffic'][$nr]['views'][$lv], (strpos($last['traffic'][$nr]['views'][$lv], "|") + 1));
97     $last_page = substr($last_page, 0, strpos($last_page, "|"));
98     $last['traffic'][$nr]['off'] = !empty($last['traffic'][$nr]['off']) ? $last['traffic'][$nr]['off'] : 0;
99    
100     if ((empty($last['traffic'][$nr]['views'])) || (!is_array($last['traffic'][$nr]['views']))) return;
101    
102     if (intval($last_page) === intval($page)) {
103     $last['traffic'][$nr]['views'][$lv] = "$last_time|$last_page|".++$last_cnt;
104     $last['traffic'][$nr]['off']++;
105     }
106     else $last['traffic'][$nr]['views'][] = "$time|$page|1";
107    
108     sort($last['traffic'][$nr]['views']);
109    
110     // number of elements to be removed with array_splice() if necessary
111     $lv = count($last['traffic'][$nr]['views']) - 1;
112     $del = (($lv + 1) > $BBC_MAXVISIBLE) ? (($lv + 1) - $BBC_MAXVISIBLE) : false;
113     $last['traffic'][$nr]['views'] = ($del !== false) ? array_splice($last['traffic'][$nr]['views'], $del) :
114     $last['traffic'][$nr]['views'];
115     }
116    
117     // The most visited pages ranking
118     function bbc_update_page_stats($connect) {
119     global $access, $last;
120    
121     $long_page = $connect['page'];
122     $over_60 = (strlen($long_page) > 60) ? 1 : 0;
123     $connect['page'] = $over_60 ? "...".substr($long_page, -57) : $long_page;
124    
125     // Fix oversized page titles
126     if (($over_60) && (isset($access['page'][$long_page]['count']))) {
127     $access['page'][($connect['page'])]['count'] = $access['page'][$long_page]['count'];
128     $access['page'][($connect['page'])]['uri'] = $access['page'][$long_page]['uri'];
129     unset($access['page'][$long_page]);
130     }
131    
132    
133     if (!isset($access['page'][($connect['page'])]['count'])) {
134     $access['page'][($connect['page'])]['count'] = 0;
135     }
136    
137     $access['page'][($connect['page'])]['count']++;
138     $access['page'][($connect['page'])]['uri'] = $connect['uri'];
139    
140     $last['pages'] = ((empty($last['pages'])) || (!is_array($last['pages']))) ? array() : $last['pages'];
141    
142     if (($over_60) && (in_array($long_page, $last['pages']))) {
143     $last['pages'][bbc_get_key($last['pages'], $long_page)] = $connect['page'];
144     }
145     if (!in_array($connect['page'], $last['pages'])) $last['pages'][] = $connect['page'];
146    
147     $connect['page'] = bbc_get_key($last['pages'], $connect['page']);
148    
149     if (isset($connect['uri'])) unset($connect['uri']);
150    
151     return $connect;
152     }
153    
154     // Transfer the raw data from the main counters of var into $last.
155     // Any new data (more recent than $BBC_MAXTIME) is used in the global stats
156     function bbc_add_new_connections_to_old() {
157     global $BBC_IGNORE_AGENT, $BBC_IGNORE_BOTS, $BBC_MAXTIME, $BBC_MAXVISIBLE, $BBC_NO_DNS, $BBC_NO_HITS,
158     $BBC_PURGE_SINGLE, $access, $last;
159    
160     // Checking whether we have new connections
161     if (!$new_access = bbc_counter_to_array()) return false;
162    
163     // cleanup if requested
164     !empty($BBC_PURGE_SINGLE) ? bbc_purge_single() : "";
165    
166     ((!empty($access['time'])) && (is_array($access['time']))) ? bbc_time_offset() : "";
167    
168     // Upgrade from older versions. We need to erase the "last" data.
169     if (isset($access['last'])) unset($access['last']);
170    
171     //check for broken 0.4.2 referrer counting and apply fix if necessary
172     if (isset($access) && !isset($access['bugs']['ref_fix']) && isset($access['referer']['not_specified'])) {
173     bbc_fix_refer_stat(array_sum($access['referer']));
174     }
175    
176     // fix wrong browser assignments
177     foreach (array("java", "wwwc", "libwww") as $what) {
178     if (isset($access) && isset($access['stat']['browser'][$what])) {
179     $access['stat']['robot'][$what] = $access['stat']['browser'][$what];
180     $access['stat']['os']['other'] -= $access['stat']['robot'][$what];
181    
182     unset($access['stat']['browser'][$what]);
183     }
184     }
185    
186     $nb_new_access = (!empty($new_access) && is_array($new_access)) ? count($new_access) : 0;
187     $nb_last_access = (!empty($last['traffic']) && is_array($last['traffic'])) ? count($last['traffic']) : 0;
188    
189     foreach ($new_access as $connect) {
190     $connect = bbc_update_connect($connect);
191    
192     // the "last reset on" flag initialisation
193     if ((!isset($access['time'])) && (!isset($access['time']['reset']))) {
194     $access['time']['reset'] = $connect['time'];
195     }
196    
197     // Stop processing if bots are completely ignored
198     if ((!empty($BBC_IGNORE_BOTS)) && ($BBC_IGNORE_BOTS == 2)) {
199     if (!empty($connect['robot'])) {
200     --$nb_new_access;
201     continue;
202     }
203     }
204     // Omit referrers coming from robots
205     $connect['referer'] = !empty($connect['robot']) ? "unknown" : $connect['referer'];
206    
207     $this_connect = $connect['time'];
208     $last_connect = !empty($access['time']['last']) ? $access['time']['last'] : 0;
209    
210     // Hits as base for time stats if desired
211     if (empty($BBC_NO_HITS)) bbc_update_time_stat($this_connect, $last_connect);
212    
213     // The script viewed
214     $connect = isset($connect['page']) ? bbc_update_page_stats($connect) : $connect;
215     $prev_recorded = 0;
216    
217     // Check if a similar connection has been recorded yet
218     for($l = $nb_last_access - 1; ($l >= 0) && (($connect['time'] - $last['traffic'][$l]['time']) < $BBC_MAXTIME);
219     $l--) {
220     if (!empty($BBC_IGNORE_AGENT) ? ($connect['ip'] == $last['traffic'][$l]['ip']) :
221     (($connect['ip'] == $last['traffic'][$l]['ip']) && ($connect['agent'] == $last['traffic'][$l]['agent']))) {
222     $last['traffic'][$l]['page'] = $connect['page'];
223     $last['traffic'][$l]['time'] = $this_connect;
224     $last['traffic'][$l]['visits']++;
225     $access['stat']['totalvisits']++;
226    
227     ($BBC_MAXVISIBLE > 0) ? bbc_update_visits($connect['time'], $connect['page'], $l) : "";
228    
229     $prev_recorded = 1;
230     break;
231     }
232     }
233    
234     // Add new connection if it hasn't been recorded yet
235     if (!$prev_recorded) {
236     if (empty($access['stat']['totalvisits'])) $access['stat']['totalvisits'] = 0;
237     if (empty($access['stat']['totalcount'])) $access['stat']['totalcount'] = 0;
238    
239     $connect['dns'] = !empty($BBC_NO_DNS) ? $connect['ip'] : bbc_clean(gethostbyaddr($connect['ip']));
240     $connect['ext'] = bbc_get_extension($connect['dns'], $connect['ip']);
241    
242     $last['traffic'][$nb_last_access] = bbc_update_access($connect);
243     // Visit stats
244     $last['traffic'][$nb_last_access]['views'][] = $last['traffic'][$nb_last_access]['time']."|"
245     .$last['traffic'][$nb_last_access]['page']."|1";
246    
247     // Unique visits as base for time stats if desired
248     if (!empty($BBC_NO_HITS)) bbc_update_time_stat($this_connect, $last_connect);
249    
250     // Referrers collection will be updated all along with the keywords if available
251     if (isset($connect['referer'])) {
252     bbc_update_referer_stat($connect['referer']);
253     $flt_search = bbc_get_keywords($connect['referer']);
254     }
255     // The search as a whole in $last
256     $last['traffic'][$nb_last_access]['search'] = ($flt_search !== false) ? implode(" ", $flt_search) : "-";
257    
258     // The host listing
259     if ((isset($connect['dns'])) && (isset($connect['ip']))) {
260     bbc_update_host_stat($last['traffic'][$nb_last_access]);
261     }
262    
263     $access['stat']['totalvisits']++;
264     $access['stat']['totalcount']++;
265     $nb_last_access++;
266     }
267     }
268     return $nb_new_access;
269     }
270    
271     // Remove unnecessary connections from $last, that either exceed the $BBC_MAXVISIBLE limit or are
272     // older than time() - $BBC_MAXTIME.
273     function bbc_update_last_access() {
274     global $last, $BBC_MAXTIME, $BBC_MAXVISIBLE, $BBC_TIMESTAMP, $BBC_TIME_OFFSET;
275    
276     if (($BBC_MAXVISIBLE <= 0) || (empty($last['traffic'])) || (!is_array($last['traffic']))) {
277     $last['traffic'] = array();
278     return;
279     }
280     else {
281     $nb_connect = count($last['traffic']);
282     $ctime = $BBC_TIMESTAMP + ($BBC_TIME_OFFSET * 60);
283    
284     for ($k = $nb_connect - 1 - $BBC_MAXVISIBLE; $k >= 0; $k--) {
285     if (($ctime - $last['traffic'][$k]['time']) > $BBC_MAXTIME) unset($last['traffic'][$k]);
286     }
287     usort($last['traffic'],"bbc_sort_time_sc");
288     }
289     }
290     ?>