From 8aa79b33a9ec46e5f7f9df11ac2a4e2e23637cfb Mon Sep 17 00:00:00 2001 From: Florian Forster Date: Sun, 25 Nov 2007 09:58:12 +0100 Subject: [PATCH 1/1] Initial commit: Imported yaala 0.7.3. --- AUTHORS | 13 ++ CHANGELOG | 410 +++++++++++++++++++++++++++++++++++ COPYING | 339 +++++++++++++++++++++++++++++ README | 52 +++++ README.persistency | 55 +++++ README.selections | 163 ++++++++++++++ config | 129 +++++++++++ lib/Yaala/Config.pm | 287 ++++++++++++++++++++++++ lib/Yaala/Data/Convert.pm | 238 ++++++++++++++++++++ lib/Yaala/Data/Core.pm | 315 +++++++++++++++++++++++++++ lib/Yaala/Data/Persistent.pm | 226 +++++++++++++++++++ lib/Yaala/Data/Setup.pm | 241 +++++++++++++++++++++ lib/Yaala/Html.pm | 203 +++++++++++++++++ lib/Yaala/Parser/Bind9.pm | 157 ++++++++++++++ lib/Yaala/Parser/Common.pm | 137 ++++++++++++ lib/Yaala/Parser/Iptables.pm | 283 ++++++++++++++++++++++++ lib/Yaala/Parser/Ncsa.pm | 193 +++++++++++++++++ lib/Yaala/Parser/Netacct.pm | 114 ++++++++++ lib/Yaala/Parser/Postfix.pm | 226 +++++++++++++++++++ lib/Yaala/Parser/Squid.pm | 139 ++++++++++++ lib/Yaala/Parser/WebserverTools.pm | 292 +++++++++++++++++++++++++ lib/Yaala/Parser/Wnserver.pm | 196 +++++++++++++++++ lib/Yaala/Parser/Xferlog.pm | 191 ++++++++++++++++ lib/Yaala/Report/Classic.pm | 302 ++++++++++++++++++++++++++ lib/Yaala/Report/Combined.pm | 431 +++++++++++++++++++++++++++++++++++++ lib/Yaala/Report/Core.pm | 42 ++++ lib/Yaala/Report/GDGraph.pm | 329 ++++++++++++++++++++++++++++ packaging/yaala.cron | 8 + packaging/yaala.spec | 71 ++++++ reports/dot-dark.png | Bin 0 -> 82 bytes reports/dot-light.png | Bin 0 -> 82 bytes reports/logo.png | Bin 0 -> 339 bytes reports/octo.css | 98 +++++++++ reports/qmax.css | 95 ++++++++ reports/style.css | 123 +++++++++++ sample_configs/common_log.conf | 102 +++++++++ sample_configs/squid_log.conf | 109 ++++++++++ webserver.config | 24 +++ yaala | 153 +++++++++++++ 39 files changed, 6486 insertions(+) create mode 100644 AUTHORS create mode 100644 CHANGELOG create mode 100644 COPYING create mode 100644 README create mode 100644 README.persistency create mode 100644 README.selections create mode 100644 config create mode 100644 lib/Yaala/Config.pm create mode 100644 lib/Yaala/Data/Convert.pm create mode 100644 lib/Yaala/Data/Core.pm create mode 100644 lib/Yaala/Data/Persistent.pm create mode 100644 lib/Yaala/Data/Setup.pm create mode 100644 lib/Yaala/Html.pm create mode 100644 lib/Yaala/Parser/Bind9.pm create mode 100644 lib/Yaala/Parser/Common.pm create mode 100644 lib/Yaala/Parser/Iptables.pm create mode 100644 lib/Yaala/Parser/Ncsa.pm create mode 100644 lib/Yaala/Parser/Netacct.pm create mode 100644 lib/Yaala/Parser/Postfix.pm create mode 100644 lib/Yaala/Parser/Squid.pm create mode 100644 lib/Yaala/Parser/WebserverTools.pm create mode 100644 lib/Yaala/Parser/Wnserver.pm create mode 100644 lib/Yaala/Parser/Xferlog.pm create mode 100644 lib/Yaala/Report/Classic.pm create mode 100644 lib/Yaala/Report/Combined.pm create mode 100644 lib/Yaala/Report/Core.pm create mode 100644 lib/Yaala/Report/GDGraph.pm create mode 100644 packaging/yaala.cron create mode 100644 packaging/yaala.spec create mode 100644 reports/dot-dark.png create mode 100644 reports/dot-light.png create mode 100644 reports/logo.png create mode 100644 reports/octo.css create mode 100644 reports/qmax.css create mode 100644 reports/style.css create mode 100644 sample_configs/common_log.conf create mode 100644 sample_configs/squid_log.conf create mode 100644 webserver.config create mode 100755 yaala diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..52b7b69 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,13 @@ +Contributions to yaala +====================== + +Mark Feenstra +- Wnserver parser + +David Augros +- Bind9 parser + +qMax +- Combined report module +- Selections +- i18n, l10n diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..08b73c3 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,410 @@ + yaala - CHANGELOG +=================== +http://yaala.org/ + + 0.7.3 - More bugs fixed + ========================= + - A bug in the debug-system has been fixed: Data::Dumper would not be + loaded, though it might be neccessary in the selected debug-level. + - A bug with the disabling of persistency has been fixed. Thanks to qMax. + + + + 0.7.2 - Potential bug fixed + ============================= + - The modules have been moved and rename, since a conflict with other + installed modules could appear in old versions of perl (<= 5.005). + + + + 0.7.1 - Bug fixed + =================== + - A bug in the persistency-code has been fixed: If the persistency-file + had to be created no checksum was included causing it to be overwritten + with the next run. The second run did set the checksum which is + propably why noone complained about this.. + + + + 0.7.0 - Persistent data is here + ================================= + - yaala now dumps it's data into a file and may use it in subsequent + runs. This way you don't have to keep all your old logfiles. + + - A tiny fix allows yaala to run under Microsoft Windows. + + + + 0.6.8 - Workaround implemented + ================================ + - A workaround for what seems to be a bug in some versions of Perl 5.8 + has been added. + + + + 0.6.7 - Some more cosmetics + ============================= + - yaala now changes into it's own directory before execution. This is + useful for cron-scripts and the like. + + - The config option ``print-graphs'' has been added for two reasons: + a) People who have GD::Graph installed can prevent yaala from + generating graphs now. + b) People who do neither have GD::Graph installed nor read the readme + will hopefully find this option and will get a detailed error + message. + + + + 0.6.6 - Bug fixed + =================== + - A bug which would not let ``host_width'' to be set to zero (infinite + length) has been fixed. Thanks to Rafael Santiago for reporting it. + + + + 0.6.5 - More aggregations + =========================== + - More than one aggregation can be selected. This works with both, the + Combined and the Classic output module. + + - Elapsed time (from the squid logfiles) is now being printed in + hh:mm:ss.f format. + + - The y-Axis of graphs is plotted in percent of the total now. + + + + 0.6.4 - Don't die on me, man! + =============================== + - yaala doesn't die anymore, if you select more than three fields with + the combined output module. An error message is printed instead. + + - Empty cells are not printed any longer by default when using the + Classic output. You can re-enable this behavior with a config-option. + + + + 0.6.3 - ... + ============= + - Chimera has been renamed to Camino; MultiZilla and Safari have been + added. + + - The sub-indizes in the Classic output now provide a bit more + information.. + + - A robots-metatag has been added to prevent search engines from indexing + yaala's reports. + + + + 0.6.2 - New/Old parser and bugfixes + ===================================== + - The (old) bind9 parser has been fixed and works fine. + + - A parser for the "xferlog" (used by wu-ftpd, proftpd and maybe other + FTP-Daemons). + + - Selections with where-clauses should work better now. + + + + 0.6.1 - The return of the graphs + ================================== + - Support for GD::Graph is finally back again. yaala checks wether + GD::Graph is installed or not and behaves accordingly. You don't have + to do anything. + + - All tables now print percentages as well. + + - A parser for postfix entries in the maillog has been added. + + - A bug in Data::Setup has been fixed. It wasn't possible to select more + than three keys with the Classic output module, which is perfectly + legal.. + + + + 0.6.0 - New code, less bugs, less features + ============================================ + - Huge parts of yaala's internals have been rewritten. The data-storage + is completely new and works better than in the 0.5.x line. + + - General code cleanup. A lot of variables have been renamed. + + - The debug-system has been unified. + + - The Classic-output plugin emulates the ``old'' (0.4.x) output. + + + + 0.5.4 - Patches by qMax + ========================= + - Input Module for method file:// has been added. + + - Fixes in Format.pm + + - Changes in qmax.css + + + + 0.5.3 - Reverse lookups + ========================= + - Ability of reverse lookups has been added. + + - Browsers and OSes are recognized better. The code should also be faster + now. + + + + 0.5.2 - Now comes the command line + ==================================== + - Every option from the config file has been made available from the + command line. + + - The recognition of Windows 2000 has been fixed. + + + + 0.5.1 - Changes in config-reading and -parsing + ================================================ + - config-reading and -parsing have been moved out of the main program and + into lib/Config.pm. Modules have been updated. + + - The config syntax has changed slightly. See POD in lib/Config.pm + + - webserver.config has been created. + + - A bug in lib/parser/Ncsa.pm has been fixed. + + - WebserverTools::detect_referer has been rewritten. + + + + 0.5.0 - yaala in the metamorphosis + ==================================== + Changes by octo + ----------------- + - Modules use the Exporter mechanismn now to import subroutines and + variables. Renamed modules in the process. + + - Output is nearly XHTML kompliant. I'm working towards total XHTML 1.1 + compatibility. + + - Added logo.png, dot-dark.png, dot-light.png + + - Removed logo.gif, dot0.gif, dit1.gif + + - Added new stylesheet and set as default. The old stylesheet has been + renamed to "qmax.css" + + - Graphics cannot be generated with this release. This option might come + back in some later release. + + - Recognition of nimbda/codered attacks has been removed. + + - few obvious bugs found but not yet fixed. See TODO. + + Changes by qMax + ----------------- + * yaala + Added new config options, removed old for backward incompatibility; + Changed some defaults; + Removed 'color' options - and defined them in html/style.css; + Made preserving spaces, semicolons and capital letters in quoted + config parameters (for date/time formats and filenames); + Added 'is_list' options to preserve order of parameter appearence + (for 'select' directive). + Added 'configtest' run mode to test configuration. + Added some debugging. + Wrote dependences for all my modules in top comment. + + * config + Changed to use new options, added sections HTML and i18n. + User level comments about new options. + Fixed some typos, have made new :) + + * README.grouping + Description of grouping expressions used in 'select' directive. + + * contrib/ + Several supplemental scripts. Like that, simulating passing + comand line parameters to yaala, processing batch reports, + ome testing. + + * html/ + Contains *.gif and style.css - a thin cyan document style. + + * reports/ + Default directory for reports. + I suggest to do not use html to avoid occasional remove of + *.gifs and style.css. + Actually, 'reports' should be symlink to some www directory. + + * lib/ + Placed all (new) modules here. + + * /dev/null + Placed all old modules there. + + * lib/parser/* + Directory for parser modules. + TODO: parsers should pass month/date/time as UNIX-time + to make them properly sorted and formated. + Currently they should work w/out i18n handling + dates and times. + + * lib/html.pm + A pair of common output utilities. + Only to generate common HTML head and foot. + Common header contains stylesheet link + and optional META http-equiv with charset. + Footer contains copyright notice and advertisements. + Top-page index is report-dependant. + + * lib/utils.pm + Some common utilities kinda cmp_arrays. + + * lib/setup.pm + Setup-parsing utilities. To keep them all in single place. + Includes index calculator for grouping expressions and all + that stuff. + + * lib/debug.pm + A pair of debugging and profiling utils. + Enabled with $main::debug|=32; + + * lib/data.pm + Data storage module. + Supports random key grouping, several functions: + SUM, MAX, MIN, AVG, COUNT(*), COUNT(field). + Allows association of function with separate index. + Incapsulates all access to data hash. + + * lib/i18n/format.pm + Localized data formatting. + Formats date, time, datetime, bignumbers, elapsed time, + properly sorts host and domain names. + POD documented to use. + + * lib/i18n/trans.pm + Enhanced translating module. + POD documented. + There also dictionary description in en.pm. + + * lib/i18n/en.pm + English template dictionary. + Contains all messages from reports, data labels from all parsers + and labels of extra info. + Used to translate internal data labels to printable titles. + There're translation suggestions in comments inside. + + * lib/i18n/ru.pm + Russian translation with several variations of words. + Makes reports to look as they were natively russian and + natively for selected configuration. + Really. + + * lib/report/core.pm + Common part for report modules. Namespaced. + Contains setup and table-generation subroutines. + Features for crossreferenced tables. + POD documented inside. + + NB: all tables generated, except top-page index, + contain borders to be viewable in links the browser. + Lynx takes a rest anyway. + + * lib/report/combined.pm + Generates combined reports: 1D, 2D, 3Dimentional. + POD inside. + + * lib/report/top.pm + Generates usual top-N report, but with key grouping. + POD inside. + + + + 0.4.2 - Added BIND9 support + ============================= + - David Augros sent me a parser for BIND9 logfiles which I added.. Easy + tasks get done quick ;) + + + + 0.4.1 - security hole fixed + ============================= + - An exploidable "bug" was fixed: It was possible to fake the referer to + contain HTML and/or JavaScript code which would show up in the generated + file(s) and eventually would be interpreted by the browser. + Thanks to Liviu Daia (Liviu.Daia@imar.ro) for the hint :) + + + + 0.4 - netacct and wnserver support + ==================================== + - Two new parsers provide netacct and wnserver support. + Thanks to M. Feenstra for the wnserver support. + + - The parser modules now have to initialize their data structure before they + start parsing. This allows for different types of logfiles to be supported. + The data-structures understood are: + count: Count each appearance of a unique value (eg. Browser, Date, etc) + sum: Build the sum of all the (numeric) values. (eg. Package Count) + byte: Basically the same as sum, but print as a byte-value + + - Use of CSS (Cascading Style Sheets) in the HTML output. Results are smaller + files and the source is easier to read. + + - You can specify a directory to create the output files in. + Be sure to copy logo.gif, dot0.gif and dot1.gif into this directory! + + + + 0.3 - Clean(er) code and squid support + ======================================== + - The modules are now a lot cleaner and easier to understand. Various routines + have been moved and renamed so the namespaces are used in a more organizing + maner. + + - There's a new input (parse) module for squid logfiles. + + - The routines which save the data in a huge hash have been altered so they + don't depend on the type of data that is parsed at all. This was neccessary + in order go get squid to work. + + - The graph module now generates PNG graphics; it's configuration hash was + moved into the main configuration. + + - The main page has some stats about CodeRed and and Nimba attacks now. You may + choose to not include these requests in your stats.. + + - Correction of some regexps and therefore (hopefully) a better performance. + + + + 0.2 - More modules - more comfort + =================================== + - The structure is now _very_ modular + + - A config file makes customization really easy + + - Apache's access-logs and NCSA-conform logfiles now understood + + - Another module provides another look (Top10) + + - CHANGELOG looks nicer ;) + + - Some changes in the makegraph.pm make the graphs look a lot better now. + + + + 0.1 - Initial version + ======================= + - No config-file + + - Parsing of multiple files + + - Support for apache's combined-log-format + + - Modular structure provides an easy way to code support for other formats. diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..a43ea21 --- /dev/null +++ b/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/README b/README new file mode 100644 index 0000000..1c894c8 --- /dev/null +++ b/README @@ -0,0 +1,52 @@ + yaala 0.7.3 - README - 2004-11-10 +=================================== +http://yaala.org/ + + + Table of Contents +------------------- +1. Brief description +2. Setting it up +3. Using it + + + 1. Brief description +====================== +"yaala" parses logfiles and generates statistics in the HTML-format. In +theory just about every (non-binary) logfile in existance can be parsed. +However, you might need to write the parser yourself. The parsing modules +that come with the package are: NCSA (= Apache Combined logs), Apache +Common logs (= access logs)wnserver logs (the verbose format), squid's +access logs, Postfix entries in the maillog and the xferlog used by some +FTP servers. + +Since I do not have any logfiles for ``wnserver'' I could not test the +parser for it! + + + 2. Setting it up +================== +You need to edit the "config" file. It is documented and shouldn't be a +problem. Take a close look at the 'select'-statement. It is used to select +the information you want to have reported. The syntax is being described +in ``README.selections''. + +Once you're familiar with all the config options you may use _ALL_ of them +in the command line. This is usefull for scripts which generate more than +one report automatically. However, it sometimes is handy to do thing +without touching the config file.. + + + 3. Using it +============= +That's really simple, once it's configured: + +octo@leeloo:~/yaala-0.7.1 $ ./yaala [FILE2 FILE3 ...] + +You can define as much logfiles as you want, but keep in mind that you +might get really big html-files. "yaala" will automatically create and/or +overwrite existing files. Please make sure to set the 'directory' option +appropriately. + +-- +octo (at verplant.org) diff --git a/README.persistency b/README.persistency new file mode 100644 index 0000000..0e04f3a --- /dev/null +++ b/README.persistency @@ -0,0 +1,55 @@ + yaala 0.7.3 - README.persistency - 2004-11-10 +=============================================== +http://yaala.org/ + + +Since version 0.7.0 yaala can dump it's internal data into a file and +read it the next time it runs to re-use already-pased data. This may be +a speedup if you parse very big logfiles and is very useful for the +daily runs. This may mean that you can delete old logfile while still +see their data in the reports. + +That is pretty cool, at least in my opinion. But it comes with a price: +You used to be able to parse the logfiles in whatever order you want. +This it not possible anymore since the parsers will think they already +have that data is newer data exists. Maybe there will be a switch to +turn that off in future versions but right now there it no such thing. + +Also, since yaala is very flexible it doesn't always make sense to use +the saved data. So if you play around it may happen that the earlier +saved data is not being used but overwritten instead. Here I want to +explain when this happens: + +Along with the persistent data two config options are saved in the +persistent-data file: ``logtype'' and (all) ``select''. These config +options are checked against the ones coming from the config-file or +command-line. If they don't match precisely the persistency-file will be +overwritten. + +This sound a bit limiting at first. But keep in mind that you can +specify different files to store the persistency-information in. So you +can realize something like that easily as a cron-job: +--sample-- + #!/bin/bash + + /path/to/yaala --persistency_file "data/squid" --logtype Squid \ + --select "bytes BY user" /var/log/squid/access.log + /path/to/yaala --persistency_file "data/apache" --logtype Common \ + --select "requests BY date, hour" /var/log/httpd/access_log + ... +--sample-- + +Of course you can also use different config-files and just use something +like this (save all the config options in extra config-files): +--sample-- + #!/bin/bash + + /path/to/yaala --config "squid.conf" + /path/to/yaala --config "apache.conf" +--sample-- + +If all this confuses you, you can turn of persistency in the config +file. Have fun ;) + +-- +octo (at verplant.org) diff --git a/README.selections b/README.selections new file mode 100644 index 0000000..609e231 --- /dev/null +++ b/README.selections @@ -0,0 +1,163 @@ + yaala 0.7.3 - README.selections - 2004-11-10 +============================================== +http://yaala.org/ + + +One of the key features of yaala is, that you cen select the data printed +in the reports yourself. This is done using one or more select statements +which can be configured either in the config file or in the command line. + +GENERAL SYNTAX +-------------- +First you have to know that there are two types of fields: normal fields +(sometimes also called 'key') and aggregations. An aggregation is +basically everything you can sum up. In a webserver logfile this would be +the amount of bytes transferred and the number of requests. The keyfields +is everything else, e.g. the status code, because it doesn't make sense to +sum it up. + +The syntax for select-statements is a bit like SQL. A basic select looks +as follows: + select: "aggregation BY field"; + +This displays, for example, the amount of bytes transferred on each day. +For more detailed output you can select more than one (key)field. (The +combined output module supports up to three fields.) The fields have to be +comma-seperated: + select: "aggregation BY field0, field1, field2"; + +If you are interested in more than one aggregation for the same +(combination of) fields, you can select more than one aggregation, too. +However, this tends to look confusing in the generated output. + select: "aggregation0, aggregation1 BY field0, field1"; + +Ok, now you might only be interested in a part of all the requests. For +example you might wonder, how many times google has visited each file. You +can do this like this: + select: "requests BY file WHERE host =~ google"; + +Or, more general like this: + select: "aggregation BY field[, field ..] WHERE field value"; + +'' is the rule how to match the values. Methods implemented are: + '==' equal + '!=' not equal + '=~' regular expression (non-numeric only) + '!~' negated regular expression (non-numeric only) + '<', '>' lesser/greater than + '<=', '>=' lesser/greater or equal + + +FIELDS PROVIDED BY PARSERS +-------------------------- +Which fields are available depends on the parser being used. A list of all +fields available from each parser follows: + +Fields provided by the 'Bind9' parser: +Aggregations: +- requests +Keyfields: +- date +- hour +- client +- query +- class +- type +- severity +- category + +Fields provided by the 'Common' parser: +Aggregations: +- bytes +- requests +Keyfields: +- date +- file +- host +- hour +- status +- tld + +Fields provided by the 'Ncsa' parser: +Aggregations: +- bytes +- requests +Keyfields: +- browser +- date +- file +- host +- hour +- os +- referer +- status +- tld +- user +- virtualhost + +Fields provided by the 'Squid' parser: +Aggregations: +- bytes +- elapsed +- requests +Keyfields: +- client +- date +- hierarchycode +- hour +- httpstatus +- method +- mime +- peer +- protocol +- resultcode +- server + +Fields provided by the 'Xferlog' parser: +Aggregations: +- bytes +- count +Keyfields: +- host +- user +- access_mode +- date +- hour +- file +- completion_status +- direction +- transfer_type +- transfer_time +- special_action + +Fields provided by the 'Postfix' parser: +Aggregations: +- count +- bytes +Keyfields: +- date +- hour +- sender +- recipient +- defer_count +- delay +- incoming_host +- outgoing_host + +Fields provided by the 'Netacct' parser: +Please check/edit netacct.config, too!! +(EXPERIMENTAL!) +Aggregations: +- bytes +- packetcount +- connections +Keyfields: +- date +- destination +- destinationport +- hour +- interface +- month +- protocol +- source +- sourceport diff --git a/config b/config new file mode 100644 index 0000000..dec8e36 --- /dev/null +++ b/config @@ -0,0 +1,129 @@ +######################################################################## +# yaala 0.7.3 config 2004-11-10 # +#---====================-----------------------------------------------# +# http://yaala.org/ # +# For exact instructions please see the README and the notes above # +# each entry. # +######################################################################## +# $Id: config,v 1.12 2004/11/10 10:07:43 octo Exp $ + +# Tells yaala the directory to save the html pages in. +# You should manually copy .gif and .css there from html +# directory. +# Default is 'reports' +#directory: 'reports'; + +# Here you can choose between the ``new'' Combined-output module and the +# Classic-output which emulates 0.4.x behaviour. +# Default is to use 'Combined' +#report: 'Combined'; + +# The module used for parsing the logfile(s) +# The modules coming with this package are: +# - Bind9 +# - Common +# - Ncsa +# - Wnserver +# - Squid +# - Xferlog +# - Postfix +# Default: 'Common' +#logtype: 'Common'; + + +######################################################################### +# Output # +#---========------------------------------------------------------------# +# The directive 'select' selects data to be printed in the report. # +# For an explaination please read ``README.selections'' # +######################################################################### + +#select: "aggregation BY field, field, field WHERE field == value"; + + +######################################################################## +# Filtering # +#---===========--------------------------------------------------------# +# These options adjust filtering data which appear in reports. # +######################################################################## + +# Wether or not yaala shall try to lookup domain names of ip adresses. +# Set to 'true' or 'false'. Default is not to. +#reverse_lookup: 'true'; + +# Sets how many subdomains of a host should be displayed. "1" means only +# the domain (plus the top-level domain), e.g. "example.com", "2" would +# be ``subdomain.example.com''. Set zero to get the full length of a +# hostname. This option also controls wether unresolved IP adresses are +# displayed as ``192.0.0.0/8'' (host_width = 1), ``192.168.0.0/16'' +# (host_width = 2), etc. +# Defaults to "1" +#host_width: 1; + +# With the classic output module not all combinations of fields appear in +# the log and are therefore empty. These empty cells are normally skipped. +# If you, for whatever reason, what these cells to be printed, set the +# following option to 'false'. +#classic_skip_empty: true; + + +######################################################################## +# HTML # +#---======-------------------------------------------------------------# +# These options affect html files generation, mostly - the HEAD # +# section. # +######################################################################## + +# If u're going to browse html pages from FILES +# rather then via http AND on OS with another +# default charset, specify charset of your html +# pages to put into META http-equiv tag. +# With webserver, proper charset SHOULD be passed +# in http header by server. +# Default is 'iso-8859-1'. +#html_charset: iso-8859-1; + +# URL to css file with style definition for +# report pages. Goes linked it from html head. +# You may put here an url or path to other css file, +# (maybe - site-wide or reports-wide) +# default is 'style.css' (should be copied where reports lie) +#html_stylesheet: '/default.css'; +#html_stylesheet: '/yaala-reports/style.css'; +html_stylesheet: 'style.css'; + + +######################################################################## +# Graphs # +#---========-----------------------------------------------------------# +# These options affect the generation of graphs and their size. If # +# unsure leave this untouched. The defaults are set to reasonable # +# values. # +######################################################################## + +# Sets wether or not graphs will be generated. Defaults to generate +# graphs if GD::Graph is installed and don't, if it is not. +#print-graphs: 'true'; + +# The following two options control the size of the graphs generated. +# Values are pixels. +#graph_height: 250; +#graph_width: 500; + + +######################################################################## +# Persistency # +#---=============------------------------------------------------------# +# These options determine if persistency is used and which file the # +# data is stored in. If unsure don't touch. # +######################################################################## + +# Sets wether or not persistency should be used. For this to work you +# need to have the ``Storable'' module installed. If unset the module +# will look for ``Storable'' and if it can find it will use persistency. +#use_persistency: 'true'; + +# Sets the file used to store the persistency data in. If you use a +# relative filename please keep in mind that it is relative to yaala's +# directory. Defaults to ``persistency.data''. +#persistency_file: 'persistency.data'; diff --git a/lib/Yaala/Config.pm b/lib/Yaala/Config.pm new file mode 100644 index 0000000..efc5def --- /dev/null +++ b/lib/Yaala/Config.pm @@ -0,0 +1,287 @@ +package Yaala::Config; + +use strict; +use warnings; +use Exporter; + +@Yaala::Config::EXPORT_OK = qw/get_config parse_argv read_config get_checksum/; + +@Yaala::Config::ISA = ('Exporter'); + +=head1 Config.pm + +Parsing of configuration files and query method. + +=head1 Usage + +use Yaala::Config qw#get_config read_config#; + +read_config ("filename"); +read_config ($filehandle); + +get_config ("key"); + +get_checksum (); + +=head1 Config Syntax + +Here are the syntax rules: + +=over 4 + +=item * + +An options starts with a keyword, followed by a colon, then the value for +that key and is ended with a semi-colon. Example: + +keyword: value; + +=item * + +Text in single- or souble quotes is taken literaly. Quotes can not be +escaped. However, singlequotes enclosed in double quotes (and vice versa) +are perfectly ok. Examples: + +teststring: "Yay, it's a string!"; + +html: ''; + +=item * + +Hashes are start comments and are ignored to the end of the line. Hashes +enclosed in quotes are B interpreted as comments.. See html-example +above.. + +=item * + +Linebreaks and spaces (unless when in quotes..) are ignored. Strings may +not span multiple lines. Use something along this lines instead: + +multiplelineoption: "This is a very very long" + "string that continues in the next line"; + +=item * + +Any key may occur more than once. You can separate two or more values with +commas: + +key: value1, value2, "This, is ONE value.."; + +key: value4; + +=back + +=head1 Interna + +=head2 Structure of $config + +C<$config-E{'key'} = ['val0', 'val1', ...];> + +=cut + +our $config = {}; + +my $VERSION = '$Id: Config.pm,v 1.4 2003/12/07 14:52:02 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +return (1); + +=head2 get_config ($key) + +Queries the config structure for the given key and returns the value(s). +In list context all values are returned, in scalar context only the most +recent one. + +=cut + +sub get_config +{ + my $key = shift; + my $val; + + if (!defined ($config->{$key})) + { + return (wantarray () ? () : ''); + } + + $val = $config->{$key}; + + if (wantarray ()) + { + return (@$val); + } + else + { + return ($val->[0]); + } +} + +=head2 parse_argv (@argv) + +Parses ARGV and adds command-line options to the internal config +structure. + +=cut + +sub parse_argv +{ + my @argv = @_; + + while (@argv) + { + my $item = shift (@argv); + + if ($item =~ m/^--?(\S+)/) + { + my $key = lc ($1); + + if (!@argv) + { + print STDERR $/, __FILE__, ": No value for key '$key'", + 'present.'; + next; + } + + my $val = shift (@argv); + + push (@{$config->{$key}}, $val); + } + elsif ($item) + { + push (@{$config->{'input'}}, $item); + } + else + { + print STDERR $/, __FILE__, ': Ignoring empty argument.'; + } + } + + return (1); +} + +=head2 parse_config ($string) + +Parses $string and adds the extracted configuration options to the +internal structure. + +=cut + +sub parse_config +{ + my $text = shift; + my $tmp = ''; + my @rep; + my $rep = 0; + + local ($/) = "\n"; + + $text =~ s/\r//sg; + + for (split (m/\n+/s, $text)) + { + my $line = $_; + chomp ($line); + + # escape quoted text + while ($line =~ m/^[^#]*(['"]).*?\1/) + { + $line =~ s/(['"])(.*?)\1/<:$rep:>/; + push (@rep, $2); + $rep++; + } + + $line =~ s/#.*$//; + $line =~ s/\s*//g; + + $tmp .= $line; + } + + $text = lc ($tmp); + + while ($text =~ m/(\w+):([^;]+);/g) + { + my $key = $1; + my @val = split (m/,/, $2); + + s/<:(\d+):>/$rep[$1]/eg for (@val); + + push (@{$config->{$key}}, @val); + } + + return (1); +} + +=head2 read_config ($file) + +Reads the configuration file. $file must either be a filename, a reference +to one or a reference to a filehandle. + +=cut + +sub read_config +{ + my $arg = shift; + my $fh; + my $text; + my $need_close = 0; + local ($/) = undef; # slurp mode ;) + + if (ref ($arg) eq 'GLOB') + { + $fh = $arg->{'IO'}; + } + elsif (!ref ($arg) || ref ($arg) eq 'SCALAR') + { + my $scalar_arg; + if (ref ($arg)) { $scalar_arg = $$arg; } + else { $scalar_arg = $arg; } + + if (!-e $scalar_arg) + { + print STDERR $/, __FILE__, ': Configuration file ', + "'$scalar_arg' does not exist"; + return (0); + } + + unless (open ($fh, "< $scalar_arg")) + { + print STDERR $/, __FILE__, ': Unable to open ', + "'$scalar_arg': $!"; + return (0); + } + + $need_close++; + } + else + { + my $type = ref ($arg); + + print STDERR $/, __FILE__, ": Reference type $type not ", + 'valid'; + return (0); + } + + # By now we should have a valid filehandle in $fh + + $text = <$fh>; + + close ($fh) if ($need_close); + + parse_config ($text); + + return (1); +} + +sub get_checksum +{ + my $logtype = get_config ('logtype'); + my @selects = get_config ('select'); + + my $checksum = lc ($logtype) . '::' . join (':', map { lc ($_) } (sort (@selects))); + + return ($checksum); +} + +=head1 Author + +Florian octo Forster Eocto@verplant.orgE diff --git a/lib/Yaala/Data/Convert.pm b/lib/Yaala/Data/Convert.pm new file mode 100644 index 0000000..bd1873b --- /dev/null +++ b/lib/Yaala/Data/Convert.pm @@ -0,0 +1,238 @@ +package Yaala::Data::Convert; + +use strict; +use warnings; + +use Exporter; +use Socket; +use Yaala::Config qw#get_config#; +use Yaala::Data::Setup qw#%DATAFIELDS#; + +@Yaala::Data::Convert::ISA = ('Exporter'); +@Yaala::Data::Convert::EXPORT_OK = qw#convert#; + +my $VERSION = '$Id: Convert.pm,v 1.7 2003/12/07 14:52:22 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +our $CACHE = {}; +our $DO_REV_LOOKUP = 0; +our $HOST_WIDTH = 1; +our $URL_FORMAT = 'host'; + +if (get_config ('reverse_lookup')) +{ + my $conf = get_config ('reverse_lookup'); + if ($conf =~ m/^(true|yes|on)$/i) + { + print STDERR $/, __FILE__, ': Will try to do reverse lookups' if ($::DEBUG & 0x40); + $DO_REV_LOOKUP = 1; + } +} + +{ + my $conf = get_config ('host_width'); + $conf =~ s/\D//g; + if ($conf ne '') + { + $HOST_WIDTH = $conf; + } +} + +if (get_config ('url_format')) +{ + my $conf = get_config ('url_format'); + if ($conf =~ m/url/i) + { + $URL_FORMAT = 'url'; + } + elsif ($conf =~ m/full/) + { + $URL_FORMAT = 'full'; + } + } + + +return (1); + +sub convert +{ + my $key = shift; + my $val = shift; + my $retval = $val; + + if (defined ($CACHE->{$key}{$val})) + { + return ($CACHE->{$key}{$val}); + } + + if (defined ($DATAFIELDS{$key})) + { + my ($class, $type) = split (m/:/, $DATAFIELDS{$key}); + + if (!defined ($type) or !$type) + { + $CACHE->{$key}{$val} = $retval if ($class eq 'key'); + return ($retval); + } + + if ($type eq 'bytes') + { + $retval = sprintf ("%.1f kByte", $val / 1024) if ($val) + } +# elsif ($type eq 'numeric') +# { +# $val =~ s/\D//g; +# if ($val) +# { +# $retval = int ($val); +# } +# else +# { +# $retval = 0; +# } +# } + elsif ($type eq 'host') + { + if ($DO_REV_LOOKUP and $val =~ m/^[\d\.]+$/) + { + $retval = do_reverse_lookup ($val); + } + + if ($HOST_WIDTH) + { + if ($retval =~ m/^[\d\.]+$/) + { + if ($DO_REV_LOOKUP) + { + $retval = '*UNRESOLVED*'; + } + else + { + my ($c, $d, $e, $f) = split (m/\./, $retval, 4); + if ($HOST_WIDTH == 1) + { + $retval = "$c.0.0.0/8"; + } + elsif ($HOST_WIDTH == 2) + { + $retval = "$c.$d.0.0/16"; + } + elsif ($HOST_WIDTH == 3) + { + $retval = "$c.$d.$e.0/24"; + } + else + { + $retval = "$c.$d.$e.$f/32"; + } + } + } + else + { + my @parts = split (m/\./, $retval); + while (scalar (@parts) > ($HOST_WIDTH + 1)) + { + shift (@parts); + } + $retval = join ('.', @parts); + } + } + } + elsif ($type eq 'url') + { + my $tmpval = $val; + $tmpval =~ s#^[a-z]+://##i; + + if ($tmpval =~ m#^([^:/]+)(?::\d+)?(/[^\?]*)(.*)#) + { + my $host = $1; + my $path = $2; + my $params = $3; + + if ($DO_REV_LOOKUP and $host =~ m/^[\d\.]+$/) + { + $host = do_reverse_lookup ($host); + } + + if ($HOST_WIDTH and $host =~ m/[^\d\.]/) + { + my @parts = split (m/\./, $host); + while (scalar (@parts) > ($HOST_WIDTH + 1)) + { + shift (@parts); + } + $host = join ('.', @parts); + } + + if ($URL_FORMAT eq 'host') + { + $retval = $host; + } + elsif ($URL_FORMAT eq 'url') + { + $retval = $host . $path; + } + elsif ($URL_FORMAT eq 'full') + { + $retval = $host . $path . $params; + } + } + elsif ($::DEBUG) + { + print STDERR $/, __FILE__, ": Unable to parse URL: '$val'"; + } + } + elsif ($type eq 'date') + { + # for later use + } + elsif ($type eq 'time' and $class eq 'agg') + { + my $hour = 0; + my $minute = 0; + my $second = 0; + + $hour = int ($val / 3600000); $val %= 3600000; + $minute = int ($val / 60000); $val %= 60000; + $second = $val / 1000; + + $retval = sprintf ("%02u:%02u:%02.1f", $hour, $minute, $second); + } + + if ($class eq 'key') + { + $CACHE->{$key}{$val} = $retval; + } + } + + return ($retval); +} + +sub do_reverse_lookup +{ + my $ip = shift; + + return ($ip) if ($ip !~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/); + + print STDERR $/, __FILE__, ": Reverse lookup for $ip" if ($::DEBUG & 0x40); + + my $iaddr = inet_aton ($ip); + if (!defined ($iaddr)) + { + print STDERR ': Failed (not a valid IP)' if ($::DEBUG & 0x40); + return ($ip); + } + + my $host = gethostbyaddr ($iaddr, AF_INET ()); + + if ($host) + { + print STDERR ": Success ($host)" if ($::DEBUG & 0x40); + return ($host); + } + else + { + print STDERR ': Failed (unknown)' if ($::DEBUG & 0x40); + return ($ip); + } +} diff --git a/lib/Yaala/Data/Core.pm b/lib/Yaala/Data/Core.pm new file mode 100644 index 0000000..1132246 --- /dev/null +++ b/lib/Yaala/Data/Core.pm @@ -0,0 +1,315 @@ +package Yaala::Data::Core; + +use strict; +use warnings; +#use vars qw#$DATA#; + +=head1 Yaala::Data::Core + +Store data to the internal structure and retrieve it again. + +=cut + +use Exporter; +use Yaala::Data::Setup qw#$USED_FIELDS $USED_AGGREGATIONS $SELECTS#; +use Yaala::Data::Convert qw#convert#; +use Yaala::Data::Persistent qw#init#; + +@Yaala::Data::Core::EXPORT_OK = qw#receive store get_values#; +@Yaala::Data::Core::ISA = ('Exporter'); + +# holds all data +#our $DATA = {}; +our $DATA = init ('$DATA', 'hash'); + +# holds the order of all fields stored in $DATA +our @FIELD_ORDER = (); + +# holds all values for each field (key) +our $VALUES_PER_FIELD = init ('$VALUES_PER_FIELD', 'hash'); + +# sort fields by occurence count in the config file. +# This _might_ speed things up. +@FIELD_ORDER = (sort { $USED_FIELDS->{$b} <=> $USED_FIELDS->{$a} } (keys %$USED_FIELDS)); + +my $VERSION = '$Id: Core.pm,v 1.13 2003/12/09 09:12:05 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +if ($::DEBUG) +{ + require Data::Dumper; + import Data::Dumper qw#Dumper#; +} + +return (1); + +=head1 Routines + +=head2 Yaala::Data::Core::delete_fields (\%data) + +Removes uninteresting fields from the hash-ref + +=cut +sub delete_fields +{ + my $data = shift; + + foreach my $key (keys %$data) + { + unless (defined ($USED_FIELDS->{$key}) + or defined ($USED_AGGREGATIONS->{$key})) + { + delete ($data->{$key}); + } + } +} + +=head2 Yaala::Data::Core:receive ($sel, $agg, \%query) + +query data from the internal structure. Takes care of wildcards (missing +keys in the query hash) itself.. + +=cut +sub receive +{ + my $sel = shift; + my $agg = shift; + my $query = shift; + my $retval = 0; + my $sel_string = $sel->[3]; + + if (ref ($agg)) + { + print STDERR $/, "Bug: ", join (', ', caller ()); + } + + if (!defined ($DATA->{$sel_string}{$agg})) + { + print STDERR $/, __FILE__, ": Unavailable aggregation requested: ``$agg''. Returning 0."; + + if ($::DEBUG) + { + my $dump = Data::Dumper->Dump ([$sel, $query], [qw#$sel $query#]); + my $file = __FILE__ . ': '; + $dump =~ s/^/$file/gm; + $dump =~ s/[\n\r]+$//s; + print STDERR $/, $dump; + } + + return (0); + } + + my $ptr = $DATA->{$sel_string}{$agg}; + + if ($::DEBUG & 0x80) + { + my $dump = Data::Dumper->Dump ([$query], ['$query']); + my $tmp = __FILE__ . ': '; + $dump =~ s/^/$tmp/gm; + $dump =~ s/[\n\r]+$//g; + print STDERR $/, $dump; + } + + for (@{$sel->[1]}) + { + my $fld = $_; + if (defined ($query->{$fld})) + { + if (defined ($ptr->{$query->{$fld}})) + { + $ptr = $ptr->{$query->{$fld}}; + } + else + { + print STDERR $/, __FILE__, ': Unavailable field requested. Returning 0.' + if ($::DEBUG & 0x10); + return (0); + } + } + else + { + my $sum = 0; + my @val = keys (%{$VALUES_PER_FIELD->{$sel_string}{$fld}}); + print STDERR $/, __FILE__, ': Query not unique. Performing subqueries for ', + scalar (@val), " values of field '$fld'." if ($::DEBUG & 0x10); + for (@val) + { + my $val = $_; + my %new_query = %$query; + $new_query{$fld} = $val; + $sum += receive ($sel, $agg, \%new_query); + } + print $/, __FILE__, ": Returning, \$sum = $sum" if ($::DEBUG & 0x10); + return ($sum); + } + } + print $/, __FILE__, ": Returning, \$\$ptr = $$ptr" if ($::DEBUG & 0x10); + return ($$ptr); +} + +=head2 Yaala::Data::Core:store (\%data) + +Saves data in the internal structure. + +=cut +sub store +{ + my $data = shift; + + delete_fields ($data); + + if ($::DEBUG & 0x80) + { + my $dump = Data::Dumper->Dump ([$data, $DATA], [qw#$data $DATA#]); + my $file = __FILE__ . ': '; + $dump =~ s/^/$file/gm; + $dump =~ s/[\n\r]+$//s; + print STDERR $/, $dump; + } + + for (@$SELECTS) + { + my $sel = $_; + my $agg = $sel->[0]; + my $sel_string = $sel->[3]; + my $ptr; + my $total_fields = 0; + my $i = 0; + + if (check_where_clauses ($sel, $data)) + { + next; + } + + for (@{$sel->[0]}) + { + my $agg = $_; + + if (!defined $DATA->{$sel_string}{$agg}) { $DATA->{$sel_string}{$agg} = {}; } + my $ptr = $DATA->{$sel_string}{$agg}; + + print STDERR $/, __FILE__, ": \$DATA->{$sel_string}{$agg}" if ($::DEBUG & 0x10); + + $total_fields = scalar (@{$sel->[1]}); + for ($i = 0; $i < $total_fields; $i++) + { + my $fld = $sel->[1][$i]; + + my $field_value = convert ($fld, $data->{$fld}); + print STDERR '{', $field_value, '}' if ($::DEBUG & 0x10); + + if (!defined ($ptr->{$field_value})) + { + if ($i == ($total_fields - 1)) + { + my $tmp = 0; + $ptr->{$field_value} = \$tmp; + } + else + { + $ptr->{$field_value} = {}; + } + } + + $ptr = $ptr->{$field_value}; + + $VALUES_PER_FIELD->{$sel_string}{$fld}{$field_value}++; + } + print STDERR " += ", $data->{$agg} if ($::DEBUG & 0x10); + + if (!defined ($$ptr) or !defined ($data->{$agg})) + { + print STDERR $/, __FILE__, ': ', + Data::Dumper->Dump ([$sel, $data], [qw/sel data/]); + } + + $$ptr += $data->{$agg}; + } + } +} + +sub get_values +{ + my $sel = shift; + my $sel_string = $sel->[3]; + my $field = shift; + + if (!defined ($VALUES_PER_FIELD->{$sel_string})) + { + print STDERR $/, __FILE__, ': selection not defined in $VALUES_PER_FIELD.' if ($::DEBUG); + return (); + } + + my @vals = keys (%{$VALUES_PER_FIELD->{$sel_string}{$field}}); + + return (@vals); +} + +sub check_where_clauses +# true == reject +# false == accept +{ + my $sel = shift; + my $data = shift; + + for (@{$sel->[2]}) + { + my $where = $_; + my ($key, $op, $val) = @$where; + my $data_val; + + if (!defined ($data->{$key}) and + ($op ne '!=' and + $op ne '!~' and + $op ne '<=' and + $op ne '<')) + { + print STDERR $/, __FILE__, ": \$data->{$key} not defined." if ($::DEBUG); + return (1); + } + elsif (!defined ($data->{$key}) and + ($op eq '!=' or + $op eq '!~' or + $op eq '<=' or + $op eq '<')) + { + next; + } + + $data_val = $data->{$key}; + + if ($op eq '=~') + { + if ($data_val =~ qr/$val/) + { + next; + } + else + { + return (1); + } + } + elsif ($op eq '!~') + { + if ($data_val !~ qr/$val/) + { + next; + } + else + { + return (1); + } + } + else + { + my $retval = 0; + my $eval = qq#if (\$data_val $op \$val) { \$retval = 0; } else { \$retval = 1; }#; + eval "$eval"; + die ('eval: ' . $@) if ($@); + + return (1) if ($retval); + } + } + + return (0); +} diff --git a/lib/Yaala/Data/Persistent.pm b/lib/Yaala/Data/Persistent.pm new file mode 100644 index 0000000..f9f2f1a --- /dev/null +++ b/lib/Yaala/Data/Persistent.pm @@ -0,0 +1,226 @@ +package Yaala::Data::Persistent; + +use strict; +use warnings; + +=head1 Yaala::Data::Persistent + +Saves datastructures to disk and retrieves them again. This allows data +to exist for longer than just one run. + +=cut + +use Yaala::Config qw#get_config get_checksum#; + +@Yaala::Data::Persistent::EXPORT_OK = qw#init#; +@Yaala::Data::Persistent::ISA = ('Exporter'); + +our $HAVE_STORABLE = 0; +our $WANT_PERSISTENCY = 1; +our $DATA_STRUCTURE = {}; +our $FILENAME = 'persistency.data'; + +my $VERSION = '$Id: Persistent.pm,v 1.5 2004/11/07 11:15:28 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +eval "use Storable qw#store retrieve#;"; +if (!$@) +{ + $HAVE_STORABLE = 1; + print STDERR ' - Storable is installed' if ($::DEBUG); +} +else +{ + print STDERR ' - Storable is NOT installed' if ($::DEBUG); +} + +=head1 Configuration options + +=head2 use_persistency + +If set to false persistency will not be used, even if the required +module ``Storable'' is installed. + +If unset it defaults to automatic detection of the ``Storable'' module +and uses persistency if possible. + +=cut + +if (get_config ('use_persistency')) +{ + my $want = lc (get_config ('use_persistency')); + if ($want eq 'no' or $want eq 'false' or $want eq 'off') + { + $WANT_PERSISTENCY = 0; + } + elsif ($want eq 'yes' or $want eq 'true' or $want eq 'on') + { + if (!$HAVE_STORABLE) + { + print STDERR $/, __FILE__, ": You've set ``use_persistency'' to ``$want''.", + $/, __FILE__, " For this to work you need to have the perl module ``Storable'' installed.", + $/, __FILE__, ' Please go to your nearest CPAN-mirror and install it first.', + $/, __FILE__, ' This config-option will be ignored.'; + } + } + elsif ($want eq 'auto' or $want eq 'automatic') + { + # do nothing.. Already been done. + } + else + { + print STDERR $/, __FILE__, ": You've set ``use_persistency'' to ``$want''.", + $/, __FILE__, ' This value is not understood and is being ignored.'; + } +} + +=head2 persistency_file + +Sets the file to store persistency data in. Defaults to +``persistency.data'' + +=cut + +if (get_config ('persistency_file')) +{ + $FILENAME = get_config ('persistency_file'); +} + +if ($HAVE_STORABLE and $WANT_PERSISTENCY and -e $FILENAME) +{ + $DATA_STRUCTURE = retrieve ($FILENAME); + + my $checksum = get_checksum (); + print STDERR $/, __FILE__, ": Config-checksum is ``$checksum''" if ($::DEBUG & 0x200); + + if (!defined ($DATA_STRUCTURE)) + { + print STDERR $/, __FILE__, ": Persistent data could not be loaded.", + $/, __FILE__, "``$FILENAME'' will be overwritten when the program exits."; + $DATA_STRUCTURE = {'*CHECKSUM*' => $checksum}; + } + else + { + if (!defined ($DATA_STRUCTURE->{'*CHECKSUM*'}) + or ($DATA_STRUCTURE->{'*CHECKSUM*'} ne $checksum)) + { + print STDERR $/, __FILE__, ": Persistent data could be read, but checksums didn't match.", + $/, __FILE__, ": The data will not be used and the file overwritten." if ($::DEBUG); + + if ($::DEBUG & 200) + { + if (!defined ($DATA_STRUCTURE->{'*CHECKSUM*'})) + { + print STDERR $/, __FILE__, ": \$DATA_STRUCTURE->{'*CHECKSUM*'} isn't defined."; + } + else + { + my $tmp = $DATA_STRUCTURE->{'*CHECKSUM*'}; + print STDERR $/, __FILE__, ": ``$tmp'' ne ``$checksum''"; + } + } + + $DATA_STRUCTURE = {'*CHECKSUM*' => $checksum}; + } + } +} +elsif ($HAVE_STORABLE and $WANT_PERSISTENCY and !-e $FILENAME) +{ + my $checksum = get_checksum (); + print STDERR $/, __FILE__, ": Config-checksum is ``$checksum''" if ($::DEBUG & 0x200); + + $DATA_STRUCTURE = {'*CHECKSUM*' => $checksum}; +} + +return (1); + +sub data_save +{ + if (!$HAVE_STORABLE) { return (undef); } + + my $pkg = caller; + my $name = shift; + my $ptr = shift; + + $DATA_STRUCTURE->{$pkg}{$name} = $ptr; +} + +sub data_load +{ + if (!$HAVE_STORABLE) { return (undef); } + + my $pkg = caller; + my $name = shift; + my $ptr; # = undef; + + if (defined ($DATA_STRUCTURE->{$pkg}{$name})) + { + $ptr = $DATA_STRUCTURE->{$pkg}{$name}; + } + + return ($ptr); +} + +=head1 Exported routines + +=head2 init ($name, $type) + +Initializes a variable in the persistency-namespace which is daved +automatically upon termination. + +The type is needed for proper initialisazion when the persistency-file +could not be read. Valid veriable types are ``scalar'', ``hash'' and +``array''. + +The name must be uniqe for each package so the module can identify which +variable is requested, + +=cut + +sub init +{ + my $pkg = caller; + my $name = shift; + my $type = shift; + my $ptr; + + if (defined ($DATA_STRUCTURE->{$pkg}{$name})) + { + $ptr = $DATA_STRUCTURE->{$pkg}{$name}; + } + else + { + if ($type eq 'scalar') + { + my $tmp = ''; + $ptr = \$tmp; + } + elsif ($type eq 'hash') + { + my %tmp = (); + $ptr = \%tmp; + } + elsif ($type eq 'array') + { + my @tmp = (); + $ptr = \@tmp; + } + else + { + die; + } + + $DATA_STRUCTURE->{$pkg}{$name} = $ptr; + } + + return ($ptr); +} + +END +{ + if ($HAVE_STORABLE and $WANT_PERSISTENCY) + { + print STDERR $/, __FILE__, ": Writing persistent data to ``$FILENAME''" if ($::DEBUG); + store ($DATA_STRUCTURE, $FILENAME); + } +} diff --git a/lib/Yaala/Data/Setup.pm b/lib/Yaala/Data/Setup.pm new file mode 100644 index 0000000..3234812 --- /dev/null +++ b/lib/Yaala/Data/Setup.pm @@ -0,0 +1,241 @@ +package Yaala::Data::Setup; + +use strict; +use warnings; +use vars qw#$USED_FIELDS $USED_AGGREGATIONS $SELECTS %DATAFIELDS#; + +=head1 Yaala::Data::Setup + +This module is currently under construction. + +=cut + +use Exporter; +use Carp qw#carp cluck croak confess#; +use Yaala::Config qw#get_config#; +use Yaala::Data::Persistent qw#init#; + +import Yaala::Parser qw#%DATAFIELDS#; + +@Yaala::Data::Setup::ISA = ('Exporter'); +@Yaala::Data::Setup::EXPORT_OK = qw#$USED_FIELDS $USED_AGGREGATIONS $SELECTS %DATAFIELDS#; +import Yaala::Parser qw#%DATAFIELDS#; + +$USED_FIELDS = init ('$USED_FIELDS', 'hash'); +$USED_AGGREGATIONS = init ('$USED_AGGREGATIONS', 'hash'); +$SELECTS = init ('$SELECTS', 'array'); + +my $VERSION = '$Id: Setup.pm,v 1.14 2003/12/07 14:52:22 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +if ($::DEBUG & 0x20) +{ + require Data::Dumper; + import Data::Dumper qw#Dumper#; +} + +read_config (); + +return (1); + +=head1 Routines + +=head2 Yaala::Data::Setup::read_config + +Parses the select-statements in the config file and returns configuration +data. To be called by Yaala::Data::Core. + +=cut +sub read_config +{ + print STDERR $/, __FILE__, ': ', + Data::Dumper->Dump ([\%DATAFIELDS], ['DATAFIELDS']) if ($::DEBUG & 0x20); + + unless (get_config ('select')) + { + print STDERR $/, __FILE__, ": Please edit the config file first!\n"; + exit (1); + } + + for (get_config ('select')) + { + print STDERR $/, __FILE__, ": Select statement from config file: '$_'" if ($::DEBUG & 0x20); + my $select = parse_select ($_); + + next unless (defined ($select)); + + push (@$SELECTS, $select); + + $USED_AGGREGATIONS->{$_}++ for (@{$select->[0]}); + $USED_FIELDS->{$_}++ for (@{$select->[1]}); + $USED_FIELDS->{$_->[0]}++ for (@{$select->[2]}); + + print STDERR $/, __FILE__, ': New selection: ', + Data::Dumper->Dump ([$select], ['select']) if ($::DEBUG & 0x20); + } + + if (!scalar (@$SELECTS)) + { + print STDERR $/, __FILE__, ": No valid select-statements found. Exiting.\n"; + exit (1); + } +} + +# select: agg from fld1 [, fld2] [where fld3 = "value" ]; +# select: bytes from date [, time] [where client = "leeloo.ff"]; +sub parse_select +{ + my $line = shift; + my $retval; + + $line =~ s/\s\s+/ /g; + + if (grep { $line eq $_->[3] } (@$SELECTS)) + { + print STDERR $/, __FILE__, ": Found duplicated selection ``$line''.", + $/, __FILE__, ": This is probably coming from Yaala::Data::Persistent and is nothing to worry about." + if ($::DEBUG); + + return (undef); + } + + #if ($line =~ m/^(\w+) BY (\w+(?:,\s?\w+)*)(?: WHERE (.+))?$/i) + if ($line =~ m/^(\w+(?:\s*,\s*\w+)*)\s+BY\s+(\w+(?:\s*,\s*\w+)*)(?:\s+WHERE\s+(.+))?$/i) + { + my ($agg_exp, $fld_exp, $where_exp) = ($1, $2, $3); + + my @aggs = (); + for (split (m/\s*,\s*/, $agg_exp)) + { + my $agg = lc ($_); + + if (!defined ($DATAFIELDS{$agg})) + { + print STDERR $/, __FILE__, ": Aggregation ``$agg'' not provided by parser. ", + "Ignoring this aggregation."; + next; + } + elsif ($DATAFIELDS{$agg} !~ m/^agg/i) + { + print STDERR $/, __FILE__, ": ``$agg'' is not an aggregation. Ignoring it."; + next; + } + + push (@aggs, $agg); + } + if (!scalar (@aggs)) + { + print STDERR $/, __FILE__, ": No valid aggregation found. Ignoring this select-statement."; + return (undef); + } + + my @fields = (); + for (split (m/\s*,\s*/, $fld_exp)) + { + my $fld = lc ($_); + + if (!defined ($DATAFIELDS{$fld})) + { + print STDERR $/, __FILE__, ": Field '$fld' not provided by parser. Ignoring it."; + next; + } + + push (@fields, $fld); + } + if (!scalar (@fields)) + { + print STDERR $/, __FILE__, ": No valid fields found. Ignoring this select-statement."; + return (undef); + } + + my @wheres = parse_where ($where_exp); + + $retval = [\@aggs, \@fields, \@wheres, $line]; + } + else + { + print STDERR $/, __FILE__, ": Unable to parse select statement:", + $/, __FILE__, ": $line", + $/, __FILE__, ": Ignoring it."; + } + + return ($retval); +} + +# where ... +# key = "val" +# key =~ "regex" +# key < val +# key > val +# key == val +sub parse_where +{ + my $where_exp = shift; + my @where = (); + + if (!defined ($where_exp)) + { + return (@where); + } + + for (split (m/\s?,\s?/, $where_exp)) + { + my $exp = $_; + if ($exp =~ m/(\w+)\s?([<>=~!]+)\s?(.+)/) + { + my ($fld, $op, $val) = ($1, $2, $3); + if (!defined ($DATAFIELDS{$fld})) + { + print STDERR $/, __FILE__, ": Error in where-clause: Field '$fld' ", + "is unknown. Ignoring it."; + next; + } + + my $type = ''; + if ($DATAFIELDS{$fld} =~ m/:/) + { + $type = (split (m/:/, $DATAFIELDS{$fld}))[1]; + } + + unless ($op =~ m/^[<>=!]=$/ + or $op eq '=~' + or $op eq '!~' + or $op eq '<' or $op eq '>') + { + print STDERR $/, __FILE__, ": Error in where-clause: Operator '$op' ", + "is unknown. Ignoring it."; + next; + } + + $val =~ s/^['"]|['"]$//g; + + if ($type ne 'numeric') + { + $op = 'eq' if ($op eq '=='); + $op = 'ne' if ($op eq '!='); + $op = 'gt' if ($op eq '>'); + $op = 'ge' if ($op eq '>='); + $op = 'lt' if ($op eq '<'); + $op = 'le' if ($op eq '<='); + } + elsif ($type eq 'numeric' and + ($op eq '=~' or $op eq '!~')) + { + print STDERR $/, __FILE__, ": Error in where clause: Can't use regex ", + "with numeric field $fld. Ignoring this clause."; + next; + } + + print STDERR $/, __FILE__, ": New where-statement: [$fld, $op, $val]" if ($::DEBUG & 0x20); + + push (@where, [$fld, $op, $val]); + } + else + { + print STDERR $/, __FILE__, ": Error in where-clause: Unable to parse '$exp'. ", + "Ignoring it."; + } + } + + return (@where); +} diff --git a/lib/Yaala/Html.pm b/lib/Yaala/Html.pm new file mode 100644 index 0000000..3e0512e --- /dev/null +++ b/lib/Yaala/Html.pm @@ -0,0 +1,203 @@ +package Yaala::Html; + +use strict; +use warnings; + +use Exporter; +use Yaala::Config qw#get_config#; +use Yaala::Data::Setup qw#$SELECTS#; + +@Yaala::Html::EXPORT_OK = qw(escape head foot navbar get_filename get_title); +@Yaala::Html::ISA = ('Exporter'); + +=head1 Html.pm + +A set of utilities used by report modules. + +=cut + +my $VERSION = '$Id: Html.pm,v 1.8 2003/12/07 14:52:02 octo Exp octo $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +return (1); + +# NB: preserves all &foo; to allow inclusion of strange characters +# returns list +sub escape +{ + my @esc = map + { + s//>/g; + s/"/"/g; + s/\s{2,}/ /g; + $_; + } (@_); + + if (wantarray ()) + { + return (@esc); + } + else + { + return (join ('', @esc)); + } +} + +# generates only common header - with title and head. +sub head +{ + my ($title, $header) = @_; + my $text; + my $charset = get_config ('html_charset'); + my $stylesheet = get_config ('html_stylesheet'); + + if (!defined ($charset) or !$charset) { $charset = 'iso-8859-1'; } + if (!defined ($stylesheet) or !$stylesheet) { $stylesheet = 'style.css'; } + + $text = qq#\n# + . qq#\n# + . qq#\n\n#; + + if ($stylesheet) + { + $text .= qq# \n#; + } + + $text .= " $title\n" + . qq# \n# + . "\n\n" + . "\n"; + + $text .= qq#

$header

\n# if $header; + return $text; +} + +sub foot +{ + my ($a, $e); + my $text = "
\n" + . qq#\n"; + + $text .= "\n\n\n"; + return $text; +} + +sub navbar +{ + my $sel = shift; + my $text = qq#\n\n"; + return ($text); +} + +sub get_filename +{ + my $sel = shift; + + my $aggs = join ('-', @{$sel->[0]}); + my $flds = join ('-', @{$sel->[1]}); + + my $filename = $aggs . '_BY_' . $flds; + + my %sign_names = + ( + '==' => 'eq', + 'eq' => 'eq', + '>=' => 'ge', + '<=' => 'le', + '!=' => 'ne', + '=~' => 're', + '!~' => 'nre', + '<' => 'lt', + '>' => 'gt' + ); + + if (scalar (@{$sel->[2]})) + { + my @where = (); + for (@{$sel->[2]}) + { + my ($key, $op, $val) = @$_; + $val =~ s/\W//g; + + $op = $sign_names{$op} if (defined ($sign_names{$op})); + push (@where, join ('-', ($key, $op, $val))); + } + + $filename .= '_WHERE_' . join ('_AND_', @where); + } + + $filename .= '.html'; + + return ($filename); +} + +sub get_title +{ + my $sel = shift; + + my @aggs = map { ucfirst ($_) } (@{$sel->[0]}); + my @flds = map { ucfirst ($_) } (@{$sel->[1]}); + + my $title = my_join (@aggs) . ' by ' . my_join (@flds); + + if (scalar (@{$sel->[2]})) + { + $title .= ' where '; + my @wheres = map + { + ucfirst ($_->[0]) . ' ' + . $_->[1] + . ' "' . $_->[2] . '"' + } (@{$sel->[2]}); + + $title .= my_join (@wheres); + } + + ($title) = escape ($title); + return ($title); +} + +sub my_join +{ + my @all = @_; + my $last = pop (@all); + + return ($last) unless (@all); + + my $retval = join (', ', @all) . " and $last"; + + return ($retval); +} diff --git a/lib/Yaala/Parser/Bind9.pm b/lib/Yaala/Parser/Bind9.pm new file mode 100644 index 0000000..d1d4bda --- /dev/null +++ b/lib/Yaala/Parser/Bind9.pm @@ -0,0 +1,157 @@ +package Yaala::Parser; + +# Written by David Augros + +use strict; +use warnings; +use vars qw(%DATAFIELDS); + +use Exporter; +use Yaala::Parser::WebserverTools qw#%MONTH_NUMBERS#; +use Yaala::Data::Persistent qw#init#; + +@Yaala::Parser::EXPORT_OK = qw#%DATAFIELDS parse extra#; +@Yaala::Parser::ISA = ('Exporter'); + +our $LASTDATE = init ('$LASTDATE', 'scalar'); +our $EXTRA = init ('$EXTRA', 'hash'); + +if (!$$LASTDATE) { $$LASTDATE = 0; } +if (!defined ($EXTRA->{'total'})) { $EXTRA->{'total'} = 0; } +if (!defined ($EXTRA->{'days'} )) { $EXTRA->{'days'} = {}; } + +our %severity = map + { $_ => 1 } + (qw#kern user mail daemon auth syslog lpr + news uucp cron authpriv ftp + local0 local1 local2 local3 + local4 local5 local6 local7#); + +%DATAFIELDS = ( + date => 'key:date', + hour => 'key:hour', + client => 'key:host', + + query => 'key', + class => 'key', + type => 'key', + + severity => 'key', + category => 'key', + + requests => 'agg' +); + +require Yaala::Data::Core; +import Yaala::Data::Core qw#store#; + +my ($default_second, $default_minute, $default_hour, $default_day, $default_year) = (localtime ())[0,1,2,3,5]; +my $default_month = (split (m/\s+/, scalar (localtime ())))[1]; +$default_year += 1900; + +my $VERSION = '$Id: Bind9.pm,v 1.4 2003/12/07 15:01:33 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +return (1); + +sub parse +{ + my $line = shift or return undef; + #if ($line =~ m/^(?:(\w{3}) (\d+) (\d\d)[\d:\.]+ )?(?:(\w+): )?(?:(\w+): )?client ([\d\.])#\d+: query: (\S+) (\S+) (\S+)$/) + if ($line =~ m/^(?:(\w{3}) (\d+) (\d\d):(\d\d):(\d\d)\.(\d\d\d) )?(?:(\w+): )?(?:(\w+): )?client ([\d\.]+)#\d+: query: (\S+) (\S+) (\S+)$/) + { + my ($client, $query, $class, $type) = ($9, $10, $11, $12); + + my ($month, $day, $hour, $minute, $second, $frac) = + ( + $default_month, $default_day, $default_hour, + $default_minute, $default_second, '000' + ); + + if (defined ($1) and $1) + { + ($month, $day, $hour, $minute, $second, $frac) = ($MONTH_NUMBERS{$1}, + $2, $3, $4, $5, $6); + + print STDERR $/, __FILE__, ": $1" if (!$month); + + my $tmp = int (sprintf ("%04u%02u%02u%02u%02u%02u%03u", + $default_year, $month, $day, $hour, + $minute, $second, $frac)); + + if ($tmp < $$LASTDATE) + { + print STDERR $/, __FILE__, ": Skipping.. ($tmp < $$LASTDATE)" if ($::DEBUG & 0x0200); + return (undef); + } + else { $$LASTDATE = $tmp; } + } + + my $date = sprintf ("%04u-%02u-%02u", + $default_year, $month, $day); + + my $category = '*UNKNOWN*'; + my $severity = '*UNKNOWN*'; + if (defined ($7) and $7 and defined ($8) and $8) + { + $category = $7; + $severity = $8; + } + elsif (defined ($7) and $7) + { + if (defined ($severity{$7})) { $severity = $7; } + else { $category = $7; } + } + elsif (defined ($8) and $8) + { + if (defined ($severity{$8})) { $severity = $8; } + else { $category = $8; } + } + + if ($query =~ m/in-addr\.arpa$/) + { + my @tmp = reverse (split (m/\./, $query)); + splice (@tmp, 0, 2); + + $query = join ('.', @tmp); + } + + $EXTRA->{'total'}++; + $EXTRA->{'days'}{$date}++; + + my %combined = ( + date => $date, + hour => $hour, + client => $client, + + query => $query, + class => $class, + type => $type, + + severity => $severity, + category => $category, + + requests => 1 + ); + + store (\%combined); + } + elsif ($::DEBUG) + { + chomp ($line); + print $/, __FILE__, ": Unable to parse: $line"; + } +} + +sub extra +{ + my ($average, $days) = (0, 1); + + return (0) unless ($EXTRA->{'total'}); + + $days = scalar (keys (%{$EXTRA->{'days'}})); + + $::EXTRA->{'Total requests'} = $EXTRA->{'total'}; + $::EXTRA->{'Average requests per day'} = sprintf ("%.1f", $EXTRA->{'total'} / $days);; + $::EXTRA->{'Reporting period'} = "$days days"; +} diff --git a/lib/Yaala/Parser/Common.pm b/lib/Yaala/Parser/Common.pm new file mode 100644 index 0000000..f99320f --- /dev/null +++ b/lib/Yaala/Parser/Common.pm @@ -0,0 +1,137 @@ +package Yaala::Parser; + +use strict; +use warnings; +use vars qw(%DATAFIELDS); + +use Exporter; +use Yaala::Parser::WebserverTools qw(%MONTH_NUMBERS); +use Yaala::Data::Persistent qw#init#; +use Yaala::Config qw#get_config#; + +@Yaala::Parser::EXPORT_OK = qw(parse extra %DATAFIELDS); +@Yaala::Parser::ISA = ('Exporter'); + +our $LASTDATE = init ('$LASTDATE', 'scalar'); +our $EXTRA = init ('$EXTRA', 'hash'); + +if (!$$LASTDATE) { $$LASTDATE = 0; } +if (!defined ($EXTRA->{'total'})) { $EXTRA->{'total'} = 0; } +if (!defined ($EXTRA->{'days'} )) { $EXTRA->{'days'} = {}; } + +%DATAFIELDS = ( + host => 'key:host', + user => 'key', + date => 'key:date', + hour => 'key:hour', + tld => 'key', + file => 'key', + status => 'key:numeric', + bytes => 'agg:bytes', + requests => 'agg' +); + +# This needs to be done at runtime, since Data uses Setup which relies on +# %DATAFIELDS to be defined -octo +require Yaala::Data::Core; +import Yaala::Data::Core qw#store#; + +my $VERSION = '$Id: Common.pm,v 1.14 2003/12/07 14:56:38 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +return (1); + +sub parse +{ + my $line = shift or return undef; + if ($line =~ /^(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]+)" (\d+) (\d+|-)$/) + { + my ($host, $ident, $user, $date, $request, $status, $bytes) = + ($1, $2, $3, $4, $5, $6, $7); + + my ($day, $month, $year, $hour, $minute, $second) = + $date =~ m#(\d\d)/(\w{3})/(\d{4}):(\d\d):(\d\d):(\d\d)#; + + $month = $MONTH_NUMBERS{$month}; + $date = sprintf("%04u-%02u-%02u", $year, $month, $day); + + { + my $tmp = int (sprintf ("%04u%02u%02u%02u%02u%02u", + $year, $month, $day, $hour, $minute, $second)); + + if ($tmp < $$LASTDATE) + { + print STDERR $/, __FILE__, ": Skipping.. ($tmp < $$LASTDATE)" if ($::DEBUG & 0x0200); + return (undef); + } + else { $$LASTDATE = $tmp; } + } + + my ($method, $file, $params); + if ($request =~ m#(\S+) ([^ \?]+)\??(\S*)#) + { + $method = $1; + $file = $2; + $params = (defined ($3) ? $3 : ''); + } + else + { + print STDERR $/, __FILE__, ": Malformed request: ``$request''." if ($::DEBUG); + return (0); + } + + if (($user ne '-') and ($status >= 400) and ($status < 500)) + { + $user = '*INVALID*'; + } + + if ($user eq '-') { $user = '*UNKNOWN*'; } + if ($bytes eq '-') { $bytes = 0; } + + my $tld; + if ($host =~ m/\.([a-z]{2,})$/i) + { + $tld = lc ($1); + } + else + { + $tld = '*UNRESOLVED*'; + } + + $EXTRA->{'total'}++; + $EXTRA->{'days'}{$date}++; + + my %combined = ( + 'host' => $host, + 'user' => $user, + 'date' => $date, + 'hour' => $hour, + 'tld' => $tld, + 'file' => $file, + 'status' => $status, + 'bytes' => $bytes, + 'requests' => 1 + ); + store (\%combined); + } + elsif ($::DEBUG) + { + chomp ($line); + print STDERR $/, __FILE__, ": Unable to parse: '$line'"; + } +} + +sub extra +{ + my ($average, $days) = (0, 0); + + $days = scalar (keys (%{$EXTRA->{'days'}})); + + return (0) unless ($days); + + $average = sprintf ("%.1f", ($EXTRA->{'total'} / $days)); + + $::EXTRA->{'Total requests'} = $EXTRA->{'total'}; + $::EXTRA->{'Average requests per day'} = $average; + $::EXTRA->{'Reporting period'} = "$days days"; +} diff --git a/lib/Yaala/Parser/Iptables.pm b/lib/Yaala/Parser/Iptables.pm new file mode 100644 index 0000000..d1ac70b --- /dev/null +++ b/lib/Yaala/Parser/Iptables.pm @@ -0,0 +1,283 @@ +package Yaala::Parser; + +use strict; +use warnings; +use vars qw#%DATAFIELDS#; + +use Exporter; +use Yaala::Data::Persistent qw#init#; +use Yaala::Parser::WebserverTools qw#%MONTH_NUMBERS#; + +@Yaala::Parser::EXPORT_OK = qw(parse extra %DATAFIELDS); +@Yaala::Parser::ISA = ('Exporter'); + +our $LASTDATE = init ('$LASTDATE', 'scalar'); + +if (!$$LASTDATE) { $$LASTDATE = 0; } + +%DATAFIELDS = +( + date => 'key:date', + hour => 'key:hour', + + source_ip => 'key', + dest_ip => 'key', + + incoming_dev => 'key', + outgoing_dev => 'key', + + protocol => 'key', + + source_port => 'key:numeric', + destination_port => 'key:numeric', + + icmp_type => 'key:numeric', + + mac_address => 'key', + + tos => 'key', + prec => 'key', + ttl => 'key:numeric', + + packets => 'agg', + size => 'agg', + payload => 'agg' +); + + +# This needs to be done at runtime, since Data uses Setup which relies on +# %DATAFIELDS to be defined -octo +require Yaala::Data::Core; +import Yaala::Data::Core qw#store#; + +my $VERSION = '$Id: Iptables.pm,v 1.4 2003/12/07 15:21:02 octo Exp octo $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +return (1); + +sub parse +{ + my $line = shift; + + if ($line =~ m/IN=(\S*) OUT=(\S*) /) + { + my $in = $1 ? $1 : '*unknown*'; + my $out = $2 ? $2 : '*unknown*'; + my $rest = $'; + + my $mac = '*unknown*'; + my $src, $dst; + my $len, $tos, $prec, $ttl, $id; + + my $ip_flags = ''; + my $frag = 0; + my $opt; + my $proto_name; + my $proto_type = 'N/A'; + + my $dport = 'N/A'; + my $sport = 'N/A'; + + if ($rest =~ m/^SRC=([\d\.]+) DST=([\d\.]+) LEN=(\d+) TOS=(0x\S\S) PREC=(0x\S\S) TTL=(\d+) ID=(\d+) /) + { + $src = $1; + $dst = $2; + $len = $3; + $tos = unpack ("%02h", $4); + $prec = $5; + $ttl = $6; + $id = $7; + $rest = $'; + + # PPPTTTTM + # ^^^^ + # 00011110 + # 1 E + $tos &= 0x1E; + if ($tos == 0x00) + { + $tos = 'Normal'; + } + elsif ($tos == 0x10) + { + $tos = 'Minimize Delay'; + } + elsif ($tos == 0x08) + { + $tos = 'Maximize Throughput'; + } + elsif ($tos == 0x04) + { + $tos = 'Maximize Reliability'; + } + else + { + $tos = sprintf ("Unknown (%02x)", $tos); + } + } + else + { + return (0); + } + + if ($rest =~ m/^((?:CE )?(?:DF )?(?:MF )?) (?:FRAG:(\d+) )?(?:OPT \(([0-9A-F]+)\) )?PROTO=(\S+) /) + { + $ip_flags = $1; + $frag = defined ($2) ? $2 : 0; + $opt = defined ($3) ? $3 : 'none'; + $proto_name = $4; + $rest = $'; + } + else + { + return (0); + } + + if (($proto eq 'TCP') or ($proto eq 'UDP')) + { + if ($rest =~ m/SPT=(\d+) DPT=(\d+) /) + { + $sport = $1; + $dport = $2; + } + } + + if ($proto eq 'TCP') + { + if ($rest =~ m/RES=0x\S\S ((?:CWR )?(?:ECE )?(?:URG )?(?:ACK )?(?:PSH )?(?:RST )?(?:SYN )?(?:FIN )?)/) + { + my $temp = $1; + $temp =~ s/ $//; + $proto_type = $temp ? $temp : '*none*'; + } + } + elsif ($proto eq 'ICMP') + { + my $type = -1; + + if ($rest =~ m/TYPE=(\d+) /) + { + $type = $1; + } + + if ($type == 0) { $proto_type = 'Echo Reply'; } + elsif ($type == 3) { $proto_type = 'Destination Unreachable'; } + elsif ($type == 4) { $proto_type = 'Source Quench'; } + elsif ($type == 5) { $proto_type = 'Redirect'; } + elsif ($type == 8) { $proto_type = 'Echo Request'; } + elsif ($type == 11) { $proto_type = 'Time Exceeded'; } + elsif ($type == 12) { $proto_type = 'Parameter Problem'; } + elsif ($type == 13) { $proto_type = 'Timestamp Request'; } + elsif ($type == 14) { $proto_type = 'Timestamp Reply'; } + elsif ($type == 15) { $proto_type = 'Information Request'; } + elsif ($type == 16) { $proto_type = 'Information Reply'; } + elsif ($type == 17) { $proto_type = 'Address Mask Request'; } + elsif ($type == 18) { $proto_type = 'Address Mask Reply'; } + else { $proto_type = "Unknown type ($type)"; } + } + + + + + + + + if ($line =~ m/IN=\S* OUT=/) + { + my ($month, $day, $hour, $minute, $second) = $line =~ m/^(\w{3}) (\d+) (\d\d):(\d\d):(\d\d)/; + my $year = (localtime ())[5] + 1900; + $month = $MONTH_NUMBERS{$month}; + + { + my $tmp = int (sprintf ("%04u%02u%02u%02u%02u%02u", + $year, $month, $day, $hour, $minute, $second)); + + if ($tmp < $$LASTDATE) + { + print STDERR $/, __FILE__, ": Skipping.. ($tmp < $$LASTDATE)" if ($::DEBUG & 0x0200); + return (undef); + } + else { $$LASTDATE = $tmp; } + } + + my $date = sprintf ("%04u-%02u-%02u", $year, $month, $day); + + my %packet = (); + while ($line =~ m/([A-Z]+)=(\S+)/g) + { + my $key = lc ($1); + my $val = $2; + + if ($key eq 'len') + { + if (defined ($packet{'size'})) + { + $packet{'payload'} = $val; + } + else + { + $packet{'size'} = $val; + } + } + else + { + $packet{$key} = $val; + } + } + + my %data = + ( + date => $date, + hour => $hour, + + source_ip => 'n/a', + dest_ip => 'n/a', + + incoming_dev => '*none*', + outgoing_dev => '*none*', + + protocol => '*unknown*', + + source_port => 0, + destination_port => 0, + icmp_type => 0, + + mac_address => '*unknown*', + + tos => '0x00', + prec => '0x00', + ttl => 0, + + packets => 1, + size => 0, + payload => 0 + ); + + $data{'source_ip'} = $packet{'src'} if (defined ($packet{'src'})); + $data{'dest_ip'} = $packet{'dst'} if (defined ($packet{'dst'})); + + $data{'incoming_dev'} = $packet{'in'} if (defined ($packet{'in'})); + $data{'outgoing_dev'} = $packet{'out'} if (defined ($packet{'out'})); + + $data{'protocol'} = $packet{'proto'} if (defined ($packet{'proto'})); + + $data{'source_port'} = $packet{'spt'} if (defined ($packet{'spt'})); + $data{'destination_port'} = $packet{'dpt'} if (defined ($packet{'dpt'})); + $data{'icmp_type'} = $packet{'type'} if (defined ($packet{'type'})); + + $data{'mac_address'} = $packet{'mac'} if (defined ($packet{'mac'})); + + $data{'tos'} = $packet{'tos'} if (defined ($packet{'tos'})); + $data{'prec'} = $packet{'prec'} if (defined ($packet{'prec'})); + $data{'ttl'} = $packet{'ttl'} if (defined ($packet{'ttl'})); + + $data{'size'} = $packet{'size'} if (defined ($packet{'size'})); + $data{'payload'} = $packet{'payload'} if (defined ($packet{'payload'})); + + store (\%data); + } +} + +sub extra +{ +} diff --git a/lib/Yaala/Parser/Ncsa.pm b/lib/Yaala/Parser/Ncsa.pm new file mode 100644 index 0000000..79d7625 --- /dev/null +++ b/lib/Yaala/Parser/Ncsa.pm @@ -0,0 +1,193 @@ +package Yaala::Parser; + +use strict; +use warnings; +use vars qw(%DATAFIELDS); + +use Exporter; +use Yaala::Parser::WebserverTools qw#%MONTH_NUMBERS detect_referer detect_browser + detect_os extract_data#; +use Yaala::Data::Persistent qw#init#; + +@Yaala::Parser::EXPORT_OK = qw(parse extra %DATAFIELDS); +@Yaala::Parser::ISA = ('Exporter'); + +our $LASTDATE = init ('$LASTDATE', 'scalar'); +our $EXTRA = init ('$EXTRA', 'hash'); + +if (!$$LASTDATE) { $$LASTDATE = 0; } +if (!defined ($EXTRA->{'total'})) { $EXTRA->{'total'} = 0; } +if (!defined ($EXTRA->{'days'} )) { $EXTRA->{'days'} = {}; } +if (!defined ($EXTRA->{'search_terms'} )) { $EXTRA->{'search_terms'} = {}; } + +%DATAFIELDS = ( + host => 'key:host', + user => 'key', + date => 'key:date', + hour => 'key:hour', + tld => 'key', + file => 'key', + status => 'key:numeric', + browser => 'key', + os => 'key', + referer => 'key:url', + + bytes => 'agg:bytes', + requests => 'agg' +); + +# This needs to be done at runtime, since Data uses Setup which relies on +# %datafields to be defined -octo +require Yaala::Data::Core; +import Yaala::Data::Core qw#store#; + +my $VERSION = '$Id: Ncsa.pm,v 1.10 2003/12/07 15:40:35 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +return (1); + +sub parse +{ + my $line = shift or return undef; + + #if ($line =~ m#^(\S+)\s(\S+)\s(\S+)\s\[([^\]]+)\]\s"([^"]+)"\s(\d+)\s(\S+)\s"([^"]+)"\s"([^"]+)"(?:\s"([^"]+)")?$#) + if ($line =~ m#^(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]+)" (\d+) (\S+) "([^"]+)" "([^"]+)"(?: "([^"]+)")?$#) + { +# Initialize the variables that we can get out of +# each line first.. + my ($host, $ident, $user, $date, $request, $status, + $bytes, $referer, $browser, $cookie) = + ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10); + +# And now initialize all the variables we will use +# to get more information out of each field.. + my ($day, $month, $year, $hour, $minute, $second) = + $date =~ m#(\d\d)/(\w{3})/(\d{4}):(\d\d):(\d\d):(\d\d)#; + + $month = $MONTH_NUMBERS{$month}; + $date = sprintf("%04u-%02u-%02u", $year, $month, $day); + + { + my $tmp = int (sprintf ("%04u%02u%02u%02u%02u%02u", + $year, $month, $day, $hour, $minute, $second)); + + if ($tmp < $$LASTDATE) + { + print STDERR $/, __FILE__, ": Skipping.. ($tmp < $$LASTDATE)" if ($::DEBUG & 0x0200); + next; + } + else { $$LASTDATE = $tmp; } + } + + my ($method, $file, $params); + if ($request =~ m#(\S+) ([^ \?]+)\??(\S*)#) + { + $method = $1; + $file = $2; + $params = (defined ($3) ? $3 : ''); + } + else + { + print STDERR $/, __FILE__, ": Malformed request: ``$request''." if ($::DEBUG); + return (0); + } + + if (($user ne '-') and ($status >= 400) and ($status < 500)) + { + $user = '*INVALID*'; + } + + if ($user eq '-') { $user = '*UNKNOWN*'; } + if ($bytes eq '-') { $bytes = 0; } + + my $tld; + if ($host =~ m/\.([a-z]{2,})$/i) + { + $tld = lc ($1); + } + else + { + $tld = '*UNRESOLVED*'; + } + + my $os = detect_os ($browser); + my $browser_name = detect_browser ($browser); + my @search_terms = extract_data ($referer); + if ($referer eq '-') { $referer = ''; } + + $EXTRA->{'total'}++; + $EXTRA->{'days'}{$date}++; + + if (scalar @search_terms) + { + print $/, __FILE__, ": Search Terms: ", + join (' ', @search_terms) + if ($::DEBUG & 0x1000); + + $EXTRA->{'search_terms'}{$_}++ for (@search_terms); + } + + my %combined = ( + 'host' => $host, + 'user' => $user, + 'date' => $date, + 'hour' => $hour, + 'browser' => $browser_name, + 'os' => $os, + 'tld' => $tld, + 'file' => $file, + 'referer' => $referer, + 'status' => $status, + 'bytes' => $bytes, + 'requests' => 1 + ); + store (\%combined); + } + elsif ($::DEBUG) + { + chomp ($line); + print STDERR $/, __FILE__, ": Unable to parse: '$line'"; + } +} + +sub extra +{ + my ($average, $days) = (0, 0); + + $days = scalar (keys (%{$EXTRA->{'days'}})); + return (0) unless ($days); + + $average = sprintf ("%.1f", ($EXTRA->{'total'} / $days)); + + $::EXTRA->{'Total requests'} = $EXTRA->{'total'}; + $::EXTRA->{'Average requests per day'} = $average; + $::EXTRA->{'Reporting period'} = "$days days"; + + my @sorted_terms = sort + { $EXTRA->{'search_terms'}{$b} <=> $EXTRA->{'search_terms'}{$a} } + (keys %{$EXTRA->{'search_terms'}}); + + if (@sorted_terms) + { + my $max = $EXTRA->{'search_terms'}{$sorted_terms[0]}; + my @scalar_terms = (); + + while (@sorted_terms and + ($EXTRA->{'search_terms'}{$sorted_terms[0]} / $max) > 0.1) + { + $_ = shift (@sorted_terms); + + push (@scalar_terms, + sprintf ("%s (%u)", + $_, $EXTRA->{'search_terms'}{$_}) + ); + } + $::EXTRA->{'Search terms used'} = join ("
\n ", @scalar_terms); + + if (@sorted_terms) + { + my $skipped = scalar (@sorted_terms); + $::EXTRA->{'Search terms used'} .= "
\n $skipped more skipped"; + } + } +} diff --git a/lib/Yaala/Parser/Netacct.pm b/lib/Yaala/Parser/Netacct.pm new file mode 100644 index 0000000..17cf587 --- /dev/null +++ b/lib/Yaala/Parser/Netacct.pm @@ -0,0 +1,114 @@ +package Yaala::Parser; +# FIXME + +use strict; +use warnings; +use vars qw(%names %datafields); + +use Exporter; +use Config qw#get_config read_config#; + +die; + +@Yaala::Parser::EXPORT_OK = qw#parse extra %datafields#; + +@Yaala::Parser::ISA = ('Exporter'); + +print STDERR "\nparser/netacct: Using NET-ACCT format" if $::DEBUG; +# FIXME: pass month, date and hour in seconds to properly format and sort. + +read_config ('netacct.config'); +for (get_config ('alias')) +{ + s/\s//g; + my ($name, $ips) = split (m/:/, $_); + my @ips = split (m/,/, $ips); + + for (grep { m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ } @ips) + { + $names{$_} = $name; + } +} + +%datafields= ( + protocol => 'key', + source => 'key:host', + sourceport => 'key', + destination => 'key:host', + destinationport => 'key', + interface => 'key', + user => 'key', + month => 'key', + date => 'key', + hour => 'key', + packetcount => 'amount:number', + bytes => 'amount:bytes', + connections => 'amount:number' + ); + +# This needs to be done at runtime, since Data uses Setup which relies on +# %datafields to be defined -octo +require Yaala::Data; +import Yaala::Data qw#store#; + +return (1); + +sub parse +{ + my $line = shift or return undef; + + my @data = split (/[\t\s]+/, $line, 10); + +# Initialize the variables that we can get out of +# each line first.. + my ($epoch, $protocol, $source_ip, $source_port, $dest_ip, + $dest_port, $packet_count, $data_size, $interface, + $user) = @data; + + my ($hour, $day, $month, $year) = (localtime ($epoch))[2,3,4,5]; + ++$month; $year += 1900; + my $date = sprintf ("%04u-%02u-%02u", $year, $month, $day); + $hour = sprintf ("%02u", $hour); + $month = sprintf ("%02u", $month); + +# And now initialize all the variables we will use +# to get more information out of each field.. + + if ($protocol == 1) { $protocol = 'ICMP'; } + elsif ($protocol == 6) { $protocol = 'TCP'; } + elsif ($protocol == 17) { $protocol = 'UDP'; } + + if (defined $names{$source_ip}) { $source_ip = $names{$source_ip}; } + elsif ($source_ip eq '127.0.0.1') { $source_ip = 'localhost'; } + elsif ($source_ip =~ /^192\.168\./) { $source_ip = 'lan'; } + else { $source_ip = 'extern'; } + + if (defined $names{$dest_ip}) { $dest_ip = $names{$dest_ip}; } + elsif ($dest_ip eq '127.0.0.1') { $dest_ip = 'localhost'; } + elsif ($dest_ip =~ /^192\.168\./) { $dest_ip = 'lan'; } + else { $dest_ip = 'extern'; } + + my %combined = ( + 'protocol' => $protocol, + 'source' => $source_ip, + 'sourceport' => $source_port, + 'destination' => $dest_ip, + 'destinationport'=> $dest_port, + 'packetcount' => $packet_count, + 'interface' => $interface, + 'user' => $user, + 'bytes' => $data_size, + 'hour' => $hour, + 'date' => $date, + 'month' => $month, + 'connections' => 1 + ); + store (\%combined); +} + +sub extra +{ + # foo +} + +1; diff --git a/lib/Yaala/Parser/Postfix.pm b/lib/Yaala/Parser/Postfix.pm new file mode 100644 index 0000000..12fe222 --- /dev/null +++ b/lib/Yaala/Parser/Postfix.pm @@ -0,0 +1,226 @@ +package Yaala::Parser; + +use strict; +use warnings; +use vars qw#%DATAFIELDS#; + +use Exporter; +use Yaala::Data::Persistent qw#init#; +use Yaala::Parser::WebserverTools qw#%MONTH_NUMBERS#; + +@Yaala::Parser::EXPORT_OK = qw(parse extra %DATAFIELDS); +@Yaala::Parser::ISA = ('Exporter'); + +our $LASTDATE = init ('$LASTDATE', 'scalar'); +our $EXTRA = init ('$EXTRA', 'hash'); +our $MAILS = init ('$MAILS', 'hash'); + +if (!$$LASTDATE) { $$LASTDATE = 0; } +if (!defined ($EXTRA->{'relay_denied'})) { $EXTRA->{'relay_denied'} = 0; } +if (!defined ($EXTRA->{'tls_hosts'} )) { $EXTRA->{'tls_hosts'} = {}; } + +%DATAFIELDS = +( + date => 'key:date', + hour => 'key:hour', + + sender => 'key', + recipient => 'key', + + defer_count => 'key:numeric', + delay => 'key:time', + + incoming_host => 'key:host', + outgoing_host => 'key:host', + + count => 'agg', + bytes => 'agg:bytes' +); + +# This needs to be done at runtime, since Data uses Setup which relies on +# %DATAFIELDS to be defined -octo +require Yaala::Data::Core; +import Yaala::Data::Core qw#store#; + +my $VERSION = '$Id: Postfix.pm,v 1.6 2003/12/07 15:42:22 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +return (1); + +sub parse +{ + my $line = shift; + + if ($line =~ m#^(\w{3})\s+(\d+) (\d\d):(\d\d):(\d\d) (\S+) postfix/([^\[]+)[^:]+: ([A-F0-9]+): (.+)$#) + { + my ($month, $day, $hour, $minute, $second, + $hostname, $service, $id, $line_end) = + ($1, $2, $3, $4, $5, $6, $7, $8, $9); + my $year = (localtime ())[5] + 1900; + $month = $MONTH_NUMBERS{$month}; + + { + my $tmp = int (sprintf ("%04u%02u%02u%02u%02u%02u", + $year, $month, $day, $hour, $minute, $second)); + + if ($tmp < $$LASTDATE) + { + print STDERR $/, __FILE__, ": Skipping.. ($tmp <= $$LASTDATE)" if ($::DEBUG & 0x0200); + return (undef); + } + else { $$LASTDATE = $tmp; } + } + + my $date = sprintf ("%04u-%02u-%02u", $year, $month, $day); + + if (!defined ($MAILS->{$id})) + { + $MAILS->{$id} = + { + date => $date, + hour => $hour, + sender => '*UNKNOWN*', + recipient => '*UNKNOWN*', + defer_count => 0, + delay => 0, + incoming_host => '*UNKNOWN*', + outgoing_host => '*UNKNOWN*', + count => 1, + bytes => 0 + }; + } + + $MAILS->{$id}{'date'} = $date; + $MAILS->{$id}{'hour'} = $hour; + + if ($line_end =~ m/^to=<([^>]+)>, relay=([^,]+), delay=(\d+), status=(\w+)/) + { + my ($to, $relay, $delay, $status) = ($1, $2, $3, $4); + + $MAILS->{$id}{'recipient'} = $to; + if ($MAILS->{$id}{'delay'} < $delay) + { + $MAILS->{$id}{'delay'} = $delay; + } + + if ($relay =~ m/^([^\[]+)\[([\d\.]+)\]$/) + { + my $host = $1; + my $ip = $2; + + if ($host eq 'unknown') + { + $MAILS->{$id}{'outgoing_host'} = $ip; + } + else + { + $MAILS->{$id}{'outgoing_host'} = $host; + } + } + elsif ($relay eq 'local') + { + $MAILS->{$id}{'outgoing_host'} = 'localhost'; + } + + if ($status eq 'sent') + { + store_mail ($id); + } + elsif ($status eq 'deferred') + { + $MAILS->{$id}{'defer_count'}++; + } + elsif ($status eq 'bounced') + { + $MAILS->{$id}{'recipient_count'}--; + if ($MAILS->{$id}{'recipient_count'} < 1) + { + delete ($MAILS->{$id}); + } + } + elsif ($::DEBUG) + { + print STDERR $/, __FILE__, ": Unknown status: $status"; + } + } + elsif ($line_end =~ m/^from=<([^>]*)>, size=(\d+), nrcpt=(\d+)/) + { + my ($from, $size, $nrcpt) = ($1, $2, $3, $4); + + $MAILS->{$id}{'sender'} = $from if ($from); + $MAILS->{$id}{'bytes'} = $size; + + $MAILS->{$id}{'recipient_count'} = $nrcpt; + } + elsif ($line_end =~ m/client=([^ ,]+)/) + { + my $client = $1; + + if ($client =~ m/^([^\[]+)\[([\d\.]+)\]$/) + { + my $host = $1; + my $ip = $2; + + if ($host eq 'unknown') + { + $MAILS->{$id}{'incoming_host'} = $ip; + } + else + { + $MAILS->{$id}{'incoming_host'} = $host; + } + } + elsif ($::DEBUG) + { + print STDERR $/, __FILE__, + ": Unable to parse client string: $client"; + } + } + } + elsif ($line =~ m/Relay access denied/i) + { + $EXTRA->{'relay_denied'}++; + } + elsif ($line =~ m/TLS connection established (?:to|from) ([^\[]+)\[([^\]]+)\]/i) + { + my $host = $1; + my $ip = $2; + + my $ident = ($host eq 'unknown' ? $ip : $host); + + $EXTRA->{'tls_hosts'}{$ident} = 1; + } + elsif ($::DEBUG and 0) + { + chomp ($line); + print STDERR $/, __FILE__, ": Unable to parse line: $line"; + } +} + +sub store_mail +{ + my $id = shift; + my $mail = $MAILS->{$id}; + + store ($mail); + + $mail->{'recipient_count'}--; + if ($mail->{'recipient_count'} < 1) + { + delete ($MAILS->{$id}); + } +} + +sub extra +{ + if ($EXTRA->{'relay_denied'}) + { + $::EXTRA->{'Relay access denied'} = sprintf ("%u times", $EXTRA->{'relay_denied'}); + } + + my $tls_hosts = scalar (keys (%{$EXTRA->{'tls_hosts'}})); + if ($tls_hosts) + { + $::EXTRA->{'TLS connections'} = sprintf ("%u hosts", $tls_hosts); + } +} diff --git a/lib/Yaala/Parser/Squid.pm b/lib/Yaala/Parser/Squid.pm new file mode 100644 index 0000000..be429be --- /dev/null +++ b/lib/Yaala/Parser/Squid.pm @@ -0,0 +1,139 @@ +package Yaala::Parser; + +use strict; +use warnings; +use vars qw(%DATAFIELDS); + +use Exporter; +use Yaala::Data::Persistent qw#init#; +use Yaala::Config qw#get_config#; + +@Yaala::Parser::EXPORT_OK = qw#parse extra %DATAFIELDS#; +@Yaala::Parser::ISA = ('Exporter'); + +our $LASTDATE = init ('$LASTDATE', 'scalar'); +our $EXTRA = init ('$EXTRA', 'hash'); + +if (!$$LASTDATE) { $$LASTDATE = 0; } +if (!defined ($EXTRA->{'total'})) { $EXTRA->{'total'} = 0; } +if (!defined ($EXTRA->{'start'})) { $EXTRA->{'start'} = 0; } +if (!defined ($EXTRA->{'end'} )) { $EXTRA->{'end'} = 0; } +if (!defined ($EXTRA->{'days'} )) { $EXTRA->{'days'} = {}; } + +%DATAFIELDS = ( + 'date' => 'key:date', + 'hour' => 'key:hour', + 'client' => 'key:host', + 'server' => 'key:host', + 'peer' => 'key:host', + 'protocol' => 'key', + 'method' => 'key', + 'mime' => 'key', + 'httpstatus' => 'key:numeric', + 'resultcode' => 'key', + 'hierarchycode' => 'key', + 'ident' => 'key', + 'bytes' => 'agg:bytes', + 'elapsed' => 'agg:time', + 'requests' => 'agg' +); + +# This needs to be done at runtime, since Data uses Setup which relies on +# %datafields to be defined -octo +require Yaala::Data::Core; +import Yaala::Data::Core qw#store#; + +my $VERSION = '$Id: Squid.pm,v 1.11 2003/12/07 16:46:50 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +return (1); + +sub parse +{ + my $line = shift or return undef; + #if ($line =~ m#^(\S+)\s+(\d+)\s(\S+)\s([^/]+)/(\d+)\s(\d+)\s(\S+)\s(\S+)\s(\S+)\s([^/]+)/(\S+)\s(.*)$#) + if ($line =~ m#^(\S+)\s+(\d+) (\S+) ([^/]+)/(\d+) (\d+) (\S+) (\S+) (\S+) ([^/]+)/(\S+) (.*)$#) + { + my ($epoch, $duration, $client, $result_code, $http_code, + $size, $method, $url, $ident, $hierarchy_code, + $origin_host, $mime) = + ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12); + + if ($epoch < $$LASTDATE) + { + print STDERR $/, __FILE__, ": Skipping.. ($epoch < $$LASTDATE)" if ($::DEBUG & 0x0200); + return (undef); + } + else { $$LASTDATE = $epoch; } + + return undef if ($url eq '*'); + + my $hour = sprintf ("%02u", (localtime ($epoch))[2]); + my $day = sprintf ("%02u", (localtime ($epoch))[3]); + my $month = sprintf ("%02u", (localtime ($epoch))[4] + 1); + my $year = sprintf ("%04u", (localtime ($epoch))[5] + 1900); + + my $date = sprintf ("%04u-%02u-%02u", $year, $month, $day); + + $EXTRA->{'total'}++; + $EXTRA->{'days'}{$date}++; + $EXTRA->{'start'} = $epoch if (($epoch < $EXTRA->{'start'}) or !$EXTRA->{'start'}); + $EXTRA->{'end'} = $epoch if ($epoch > $EXTRA->{'end'}); + + my ($protocol, $server, $path); + if ($url =~ m#^([^:]+)://([^:/]+)(?::\d+)?(/[^\?]*)#) + { + ($protocol, $server, $path) = ($1, $2, $3); + } + elsif ($url =~ /^([\w\d\-\.]+):443$/) + { + ($protocol, $server, $path) = ('https', $1, ''); + } + else + { + print STDERR $/, __FILE__, ": Unable to parse URL: ``$url''" if ($::DEBUG); + return (0); + } + + if ($ident eq '-') { $ident = '*UNKNOWN*'; } + + my %combined=( + 'client' => $client, + 'resultcode' => uc ($result_code), + 'httpstatus' => $http_code, + 'method' => $method, + 'mime' => $mime, + 'bytes' => $size, + 'server' => $server, + 'protocol' => uc ($protocol), + 'hierarchycode' => uc ($hierarchy_code), + 'ident' => uc ($ident), + 'peer' => $origin_host, + 'date' => $date, + 'hour' => $hour, + 'elapsed' => $duration, + 'requests' => 1, + ); + store (\%combined); + } + elsif ($::DEBUG) + { + chomp ($line); + print STDERR $/, __FILE__, ": Unable to parse: ``$line''"; + } +} + +sub extra +{ + my $start = scalar (localtime ($EXTRA->{'start'})); + my $end = scalar (localtime ($EXTRA->{'end'})); + my $days = scalar (keys (%{$EXTRA->{'days'}})); + my $average = 0; + if ($days) { $average = sprintf ("%.1f", ($EXTRA->{'total'} / $days)); } + + $::EXTRA->{'Total requests'} = $EXTRA->{'total'}; + $::EXTRA->{'Reporting period'} = "$days Days"; + $::EXTRA->{'Average requests per day'} = $average; + $::EXTRA->{'Start date'} = $start; + $::EXTRA->{'End date'} = $end; +} diff --git a/lib/Yaala/Parser/WebserverTools.pm b/lib/Yaala/Parser/WebserverTools.pm new file mode 100644 index 0000000..206289e --- /dev/null +++ b/lib/Yaala/Parser/WebserverTools.pm @@ -0,0 +1,292 @@ +package Yaala::Parser::WebserverTools; + +use strict; +use warnings; +use vars qw(%fields %MONTH_NUMBERS); + +use Exporter; +use Yaala::Config qw#get_config read_config#; + +@Yaala::Parser::WebserverTools::EXPORT_OK = qw(%MONTH_NUMBERS detect_referer + detect_browser detect_os extract_data); +@Yaala::Parser::WebserverTools::ISA = ('Exporter'); + +read_config ('webserver.config'); + +our $referer_format = get_config ('referer_format'); +our $localhost_name = ''; +our @local_aliases = get_config ('localhost'); + +our %recognized_browsers; +our %recognized_oses; + +# Used to translate the month's name into it's number +%MONTH_NUMBERS = ( 'Jan' => 1, + 'Feb' => 2, + 'Mar' => 3, + 'Apr' => 4, + 'May' => 5, + 'Jun' => 6, + 'Jul' => 7, + 'Aug' => 8, + 'Sep' => 9, + 'Oct' => 10, + 'Nov' => 11, + 'Dec' => 12 ); + +our %fields = +(# the CGI fields that different search engines use to store the search strings in + 'MT' => 'lycos', # hotbot.lycos.com + 'ask' => 'ask.com', # ask.com/main/metaAnswer.asp + 'origq' => 'msn', # search.msn.com/results.asp + 'p' => 'yahoo', # google.yahoo.com/bin/query + 'q' => 'google|freshmeat', # google.com/search, freshmeat.net/search, google.de/search + 'qs' => 'virgilio', # search.virgilio.it/search/cgi/search.cgi + 'query' => 'lycos', # search-arianna.iol.it/abin/internationalsearch, search.lycos.com/main/default.asp, suche.lycos.de/cgi-bin/pursuit + 'search' => 'altavista|excite' # altavista.com/iepane, search.excite.ca/search.gw +); + +{ + my $include_local = get_config ('referer_include_localhost'); + + if ($include_local =~ m/true/i) + { + $localhost_name = 'localhost'; + } +} + +my $VERSION = '$Id: WebserverTools.pm,v 1.5 2003/12/07 16:47:14 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +return (1); + +sub detect_referer +# Used to extract the referer if parsing webserver +# logs. +{ + my $referer = shift; + my $host; + my $uri; + my $params; + + ($host, $uri, $params) = + $referer =~ m#^\w+://([^:/]+)(?::\d+)?(/[^\?]*)(\??.*)#; + #$referer =~ m#^\w+://([^:/]+)(?::\d+)?#; + + return ('') unless (defined ($host)); + + if (grep { $host =~ m/$_/i } @local_aliases) + { + $host = $localhost_name; + } + + return ('*NONE*') unless ($host); + + if ($referer_format eq 'full') + { + return ($host . $uri . $params); + } + elsif ($referer_format eq 'url') + { + return ($host . $uri); + } + else + { + return ($host); + } +} + +sub detect_browser +# This is used to (try) to translate the browser +# string into something more human-readable and +# to have a smaller number of browsers so +# information is easier to cathegorize.. If you +# don't understand this routine without comments +# you should invest in some perl book, I think.. +{ + my $browser = shift; + + if (defined $recognized_browsers{$browser}) + { + return ($recognized_browsers{$browser}); + } + + my $name = 'unknown'; + if ($browser =~ /Lynx/i) { $name = 'Lynx'; } + elsif ($browser =~ /Links/i) { $name = 'Links'; } + elsif ($browser =~ /Opera/i) { $name = 'Opera'; } + elsif ($browser =~ /WebTV/i) { $name = 'WebTV'; } + elsif ($browser =~ /curl/i) { $name = 'curl'; } + elsif ($browser =~ /wget/i) { $name = 'wget'; } + elsif ($browser =~ /GetRight|GoZilla/i) { $name = 'Download Manager'; } + elsif ($browser =~ /bot|Google|Slurp|Scooter|Spider|Infoseek|Crawl|Mercator|FireBall|av\.com|Teoma|Ask Jeeves/i) { $name = 'Search Engines'; } + elsif ($browser =~ /Mozilla/i) + { + if ($browser =~ /Galeon/i) { $name = 'Galeon'; } + elsif ($browser =~ /Phoenix/i) { $name = 'Phoenix'; } + elsif ($browser =~ /Chimera|Camino/i) { $name = 'Camino'; } + elsif ($browser =~ /Konqueror/i) { $name = 'Konqueror'; } + elsif ($browser =~ /Safari/i) { $name = 'Safari'; } + elsif ($browser =~ /MultiZilla/i) { $name = 'MultiZilla'; } + elsif ($browser =~ /MSIE/i) { $name = 'MSIE'; } + elsif ($browser =~ /compatible/i) { $name = 'Netscape compatible'; } + elsif ($browser =~ m!Mozilla/[0-4]!i or $browser =~ m/Netscape/i) + { $name = 'Netscape Navigator'; } + else { $name = 'Mozilla'; } + } + elsif ($::DEBUG & 0x2000) + { + print $/, __FILE__, ": Unknown browser: '$browser'"; + } + + $recognized_browsers{$browser} = $name; + return ($name); +} + +sub detect_os +# uses the same string "detect_browser" does, +# except for that it extracts the operating system +# as good as possible. +{ + my $os = shift; + + if (defined $recognized_oses{$os}) + { + return ($recognized_oses{$os}); + } + + my $name = 'unknown'; + if ($os =~ /IRIX/i) { $name = 'IRIX'; } + elsif ($os =~ /AIX/i) { $name = 'AIX'; } + elsif ($os =~ /Sun/i) { $name = 'SunOS'; } + elsif ($os =~ /BeOS/i) { $name = 'BeOS'; } + elsif ($os =~ /OS.?2/i) { $name = 'OS/2'; } + elsif ($os =~ /Amiga/i) { $name = 'AmigaOS'; } + elsif ($os =~ /Mac|PPC/i) { $name = 'MacOS'; } + elsif ($os =~ /BSD/i) + { + if ($os =~ /open/i) { $name = 'OpenBSD'; } + elsif ($os =~ /free/i) { $name = 'FreeBSD'; } + elsif ($os =~ /net/i) { $name = 'NetBSD'; } + else { $name = 'some BSD'; } + } + elsif ($os =~ /Linux|X11|KDE|Genome|Gnome/i) { $name = 'Linux'; } + elsif ($os =~ /Win/i) + { + if ($os =~ /95/) { $name = 'Windows 95'; } + elsif ($os =~ /98/) { $name = 'Windows 98'; } + elsif ($os =~ /Me/i) { $name = 'Windows ME'; } + elsif ($os =~ /NT/i) + { + if ($os =~ /NT.5.1/i) { $name = 'Windows XP'; } + elsif ($os =~ /NT.5.0/i) { $name = 'Windows 2000'; } + else { $name = 'Windows NT'; } + } + elsif ($os =~ /2000|2k/i) { $name = 'Windows 2000'; } + elsif ($os =~ /xp/i) { $name = 'Windows XP'; } + else { $name = 'some Windows'; } + } + elsif ($os =~ /ix/i) { $name = 'some UNIX'; } + elsif ($::DEBUG & 0x2000) + { + print $/, __FILE__, ": Unknown OS: '$os'"; + } + + $recognized_oses{$os} = $name; + return ($name); +} + +sub extract_data +# This routine looks for data in the referer and +# extracts terms that visitors of this site were +# searching for at ome of the major searchengines. +# I know that my list is far from being complete. +# If your favorite search engine isn't included +# please feel free to contact me. +{ +# If there is a field that may contain such +# information, then it's this one.. + my $referer = shift; + +# We will save every field (if any) here with it's +# data being the value.. + my %form = (); + my ($key, $val) = ('', ''); + +# $server is the server the visitor is coming +# from, $string the entire data which will need +# soem decoding.. + my ($server, $string) = split (/\?/, $referer, 2); + +# Don't do anything unless there is any data.. +# We have to return an empty list since zero would +# get interpreted as a one-element array with the +# only value being "0", making zero the top word.. + return () unless $string; + + my $field = ''; + my %words = (); + +# Split data into key=value pairs + foreach (split (/\&/, $string)) + { + ($key, $val) = split (/=/, $_, 2); + next unless defined $val; + +# A "+" in the request-string means a whitespace + $val =~ s/\+/ /g; + +# Ignore all special characters.. I know that's +# lazy and will screw up words like "foo-bar", but +# IMO it does more good than bad. If you don't +# think so either uncomment the appended line or +# write better code and drop me a copy.. +# $val =~ s/\%(.{2})/pack("C", hex($1))/eg; + $val =~ s/\%(.{2})//g; + $form{$key} = $val; + } + +# Print the hash's content to STDOUT if you set +# $::DEBUG to anything higher than 2 (3, eg.) +# This is extremely usefull for finding search- +# engines and which fields they are using.. +# use './yaala | grep DATA | sort | less' for the +# best/easiest to read results.. + if ($::DEBUG & 0x1000) + { + print $/, __FILE__, "Extracted data: $_ = ", $form{$_} for (keys %form); + } + + my $regexp; +# Cycles through every PREdefined field that may +# contain the information we want. If this field +# exists, we check wether the previous visited +# server matches the regexp (the corresponding +# value in %fields) and if that's the case, we +# split the line into words saving it to %words to +# prevent duplicates. (otherwise a search for +# "foo foo foo foo foo foo foo" would result into +# increasing "foo" dramatically.. + foreach $field (keys %fields) + { +# check for this field's existance.. + next unless defined $form{$field}; + + $regexp = $fields{$field}; + +# check wether the server matches out regexp.. + next unless $server =~ /$regexp/i; + + $string = lc ($form{$field}); + +# this is a google-only thing that appears when +# the visitor used google's cache option.. + next if $string =~ /^cache:/; + +# And, after all these tests, save the data.. + map { if (length ($_) > 2) { $words{$_} = 1; } } (split (/\s+/, $string)); + } +# return %words's keys as a list, which may be +# empty.. + return keys %words; +} diff --git a/lib/Yaala/Parser/Wnserver.pm b/lib/Yaala/Parser/Wnserver.pm new file mode 100644 index 0000000..2e42bf0 --- /dev/null +++ b/lib/Yaala/Parser/Wnserver.pm @@ -0,0 +1,196 @@ +package Yaala::Parser; + +# ncsa.pm was patched to support wn-server by M. Feenstra on 20/09/2001 + +use strict; +use warnings; +use vars qw(%DATAFIELDS); + +use Exporter; +use Yaala::Parser::WebserverTools qw#%MONTH_NUMBERS detect_referer detect_browser + detect_os extract_data#; +use Yaala::Data::Persistent qw#init#; + +@Yaala::Parser::EXPORT_OK = qw(parse extra %DATAFIELDS); +@Yaala::Parser::ISA = ('Exporter'); + +our $LASTDATE = init ('$LASTDATE', 'scalar'); +our $EXTRA = init ('$EXTRA', 'hash'); + +if (!$$LASTDATE) { $$LASTDATE = 0; } +if (!defined ($EXTRA->{'total'})) { $EXTRA->{'total'} = 0; } +if (!defined ($EXTRA->{'days'} )) { $EXTRA->{'days'} = {}; } +if (!defined ($EXTRA->{'search_terms'} )) { $EXTRA->{'search_terms'} = {}; } + +%DATAFIELDS = ( + host => 'key:host', + user => 'key', + date => 'key:date', + hour => 'key:hour', + tld => 'key', + file => 'key', + status => 'key:numeric', + browser => 'key', + os => 'key', + referer => 'key:url', + virtualhost => 'key', + + bytes => 'agg:bytes', + requests => 'agg' +); + +# This needs to be done at runtime, since Data uses Setup which relies on +# %DATAFIELDS to be defined -octo +require Yaala::Data::Core; +import Yaala::Data::Core qw#store#; + +my $VERSION = '$Id: Wnserver.pm,v 1.9 2003/12/07 16:48:59 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +return (1); + +sub parse +{ + my $line = shift or return undef; + + if ($line =~ /^(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]+)" (\d+) (\d+) <[^>]*><([^>]*)> <([^>]*)> <([^>]*)> <([^>]*)>$/) + { +# Initialize the variables that we can get out of +# each line first.. + my ($host, $ident, $user, $date, $request, $status, + $bytes, $browser, $referer, $cookie, $virtual) = + ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10); + +# And now initialize all the variables we will use +# to get more information out of each field.. + my ($day, $month, $year, $hour, $minute, $second) = + $date =~ m#(\d\d)/(\w{3})/(\d{4}):(\d\d):(\d\d):(\d\d)#; + + $month = $MONTH_NUMBERS{$month}; + $date = sprintf("%04u-%02u-%02u", $year, $month, $day); + + { + my $tmp = int (sprintf ("%04u%02u%02u%02u%02u%02u", + $year, $month, $day, $hour, $minute, $second)); + + if ($tmp < $$LASTDATE) + { + print STDERR $/, __FILE__, ": Skipping.. ($tmp < $$LASTDATE)" if ($::DEBUG & 0x0200); + next; + } + else { $$LASTDATE = $tmp; } + } + + my ($method, $file, $params); + if ($request =~ m#(\S+) ([^ \?]+)\??(\S*)#) + { + $method = $1; + $file = $2; + $params = (defined ($3) ? $3 : ''); + } + else + { + print STDERR $/, __FILE__, ": Malformed request: ``$request''." if ($::DEBUG); + return (0); + } + + if (($user ne '-') and ($status >= 400) and ($status < 500)) + { + $user = '*INVALID*'; + } + + if ($user eq '-') { $user = '*UNKNOWN*'; } + if ($bytes eq '-') { $bytes = 0; } + + my $tld; + if ($host =~ m/\.([a-z]{2,})$/i) + { + $tld = lc ($1); + } + else + { + $tld = '*UNRESOLVED*'; + } + + my $os = detect_os ($browser); + my $browser_name = detect_browser ($browser); + my @search_terms = extract_data ($referer); + if ($referer eq '-') { $referer = ''; } + + $EXTRA->{'total'}++; + $EXTRA->{'days'}{$date}++; + + if (scalar @search_terms) + { + print $/, __FILE__, ": Search Terms: ", + join (' ', @search_terms) + if ($::DEBUG & 0x1000); + + $EXTRA->{'search_terms'}{$_}++ for (@search_terms); + } + + my %combined = ( + 'host' => $host, + 'user' => $user, + 'date' => $date, + 'hour' => $hour, + 'browser' => $browser_name, + 'os' => $os, + 'tld' => $tld, + 'file' => $file, + 'referer' => $referer, + 'status' => $status, + 'bytes' => $bytes, + 'virtualhost' => $virtual, + 'requests' => 1 + ); + store (\%combined); + } + elsif ($::DEBUG) + { + chomp ($line); + print STDERR $/, __FILE__, ": Unable to parse: ``$line''"; + } +} + +sub extra +{ + my ($average, $days) = (0, 0); + + $days = scalar (keys (%{$EXTRA->{'days'}})); + return (0) unless ($days); + + $average = sprintf ("%.1f", ($EXTRA->{'total'} / $days)); + + $::EXTRA->{'Total requests'} = $EXTRA->{'total'}; + $::EXTRA->{'Average requests per day'} = $average; + $::EXTRA->{'Reporting period'} = "$days days"; + + my @sorted_terms = sort + { $EXTRA->{'search_terms'}{$b} <=> $EXTRA->{'search_terms'}{$a} } + (keys %{$EXTRA->{'search_terms'}}); + + if (@sorted_terms) + { + my $max = $EXTRA->{'search_terms'}{$sorted_terms[0]}; + my @scalar_terms = (); + + while (@sorted_terms and + ($EXTRA->{'search_terms'}{$sorted_terms[0]} / $max) > 0.1) + { + $_ = shift (@sorted_terms); + + push (@scalar_terms, + sprintf ("%s (%u)", + $_, $EXTRA->{'search_terms'}{$_}) + ); + } + $::EXTRA->{'Search terms used'} = join ("
\n ", @scalar_terms); + + if (@sorted_terms) + { + my $skipped = scalar (@sorted_terms); + $::EXTRA->{'Search terms used'} .= "
\n $skipped more skipped"; + } + } +} diff --git a/lib/Yaala/Parser/Xferlog.pm b/lib/Yaala/Parser/Xferlog.pm new file mode 100644 index 0000000..ec24f09 --- /dev/null +++ b/lib/Yaala/Parser/Xferlog.pm @@ -0,0 +1,191 @@ +package Yaala::Parser; + +use strict; +use warnings; +use vars qw(%DATAFIELDS); + +use Exporter; +use Yaala::Parser::WebserverTools qw(%MONTH_NUMBERS); +use Yaala::Data::Persistent qw#init#; +use Yaala::Config qw#get_config#; + +@Yaala::Parser::EXPORT_OK = qw(parse extra %DATAFIELDS); +@Yaala::Parser::ISA = ('Exporter'); + +our $LASTDATE = init ('$LASTDATE', 'scalar'); +our $EXTRA = init ('$EXTRA', 'hash'); + +if (!$$LASTDATE) { $$LASTDATE = 0; } +if (!defined ($EXTRA->{'total'})) { $EXTRA->{'total'} = 0; } +if (!defined ($EXTRA->{'days'} )) { $EXTRA->{'days'} = {}; } + +%DATAFIELDS = ( + host => 'key:host', + user => 'key', + access_mode => 'key', + + date => 'key:date', + hour => 'key:hour', + + file => 'key', + completion_status => 'key', + direction => 'key', + transfer_type => 'key', + transfer_time => 'key:numeric', + special_action => 'key', + + bytes => 'agg:bytes', + count => 'agg' +); + +# This needs to be done at runtime, since Data uses Setup which relies on +# %DATAFIELDS to be defined -octo +require Yaala::Data::Core; +import Yaala::Data::Core qw#store#; + +my $VERSION = '$Id: Xferlog.pm,v 1.4 2003/12/07 16:49:56 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +return (1); + +sub parse +{ + my $line = shift; + my @fields = split(m/\s+/, $line); + my ($hour, $minute, $second) = split (m/:/, $fields[3]); + + if (scalar (@fields) != 18) + { + print STDERR $/, __FILE__, ': There were ', + scalar (@fields), ' when 18 where expected..' + if ($::DEBUG); + } + + { + my $tmp = int (sprintf ("%04u%02u%02u%02u%02u%02u", + $fields[4], $MONTH_NUMBERS{$fields[1]}, $fields[2], + $hour, $minute, $second)); + + if ($tmp < $$LASTDATE) + { + print STDERR $/, __FILE__, ": Skipping.. ($tmp < $$LASTDATE)" if ($::DEBUG & 0x0200); + return (undef); + } + else { $$LASTDATE = $tmp; } + } + + my $date = sprintf ("%04u-%02u-%02u", $fields[4], $MONTH_NUMBERS{$fields[1]}, $fields[2]); + + my $file = $fields[8]; + my $bytes = $fields[7]; + + my $transfer_time = $fields[5]; + + my $host = $fields[6]; + my $user = $fields[13]; + + my $transfer_type = ($fields[9] eq 'a' ? 'ascii' : 'binary'); + my $completion_status = ($fields[17] eq 'c' ? 'complete' : 'incomplete'); + + my $special_action; + if ($fields[10] eq '_') + { + $special_action = "none"; + } + elsif ($fields[10] eq 'C') + { + $special_action = 'compressed'; + } + elsif ($fields[10] eq 'U') + { + $special_action = 'uncompressed'; + } + elsif ($fields[10] eq 'T') + { + $special_action = "tar'ed"; + } + else + { + print STDERR $/, __FILE__, ': Unknown special_action: ', + $fields[10] if ($::DEBUG); + return (0); + } + + my $direction; + if ($fields[11] eq 'i') + { + $direction = 'incoming'; + } + elsif ($fields[11] eq 'o') + { + $direction = 'outgoing'; + } + elsif ($fields[11] eq 'd') + { + $direction = 'deleted'; + } + else + { + print STDERR $/, __FILE__, ': Unknown direction: ', + $fields[11] if ($::DEBUG); + return (0); + } + + my $access_mode; + if ($fields[12] eq 'a') + { + $access_mode = 'anonymous'; + } + elsif ($fields[12] eq 'g') + { + $access_mode = 'guest'; + } + elsif ($fields[12] eq 'r') + { + $access_mode = 'real'; + } + else + { + print STDERR $/, __FILE__, ': Unknown access-method: ', + $fields[12] if ($::DEBUG); + return (0); + } + + $EXTRA->{'total'}++; + $EXTRA->{'days'}{$date}++; + + # 14: service-name + # 15: authentication-method + # 16: authentication-user-id + + my %data_set = ( + host => $host, + user => $user, + access_mode => $access_mode, + + date => $date, + hour => $hour, + + file => $file, + completion_status => $completion_status, + direction => $direction, + transfer_type => $transfer_type, + transfer_time => $transfer_time, + special_action => $special_action, + + bytes => $bytes, + count => 1 + ); + store (\%data_set); +} + +sub extra +{ + $::EXTRA->{'Requests Total'} = $EXTRA->{'total'}; + + my $days = scalar (keys (%{$EXTRA->{'days'}})); + + $::EXTRA->{'Reporting period'} = "$days days"; + + $::EXTRA->{'Average requests per day'} = sprintf ("%.1f", $EXTRA->{'total'} / $days); +} diff --git a/lib/Yaala/Report/Classic.pm b/lib/Yaala/Report/Classic.pm new file mode 100644 index 0000000..b121df9 --- /dev/null +++ b/lib/Yaala/Report/Classic.pm @@ -0,0 +1,302 @@ +package Yaala::Report; + +use strict; +use warnings; + +use Exporter; +use Yaala::Html qw#head foot escape navbar get_filename get_title#; +use Yaala::Data::Core qw#receive get_values#; +use Yaala::Data::Setup qw#$SELECTS#; +use Yaala::Data::Convert qw#convert#; +use Yaala::Config qw#get_config#; +use Yaala::Report::Core qw#$OUTPUTDIR#; +use Yaala::Report::GDGraph qw#generate_graph $GRAPH_WIDTH $GRAPH_HEIGHT#; + +@Yaala::Report::EXPORT_OK = qw#generate#; +@Yaala::Report::ISA = ('Exporter'); + +my $VERSION = '$Id: Classic.pm,v 1.10 2003/12/07 14:53:30 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +our $skip_empty = 1; +if (get_config ('classic_skip_empty')) +{ + my $conf = lc (get_config ('classic_skip_empty')); + if ($conf eq 'no' or $conf eq 'false') { $skip_empty = 0; } +} + +return (1); + +sub generate +{ + for (@$SELECTS) + { + my $sel = $_; + my @keys = @{$sel->[1]}; + + generate_sub_index ($sel); + + for (@keys) + { + my $key = $_; + generate_sub_page ($sel, $key); + } + } + + generate_index_page (); + + return (1); +} + +sub generate_sub_page +{ + my $sel = shift; + my $key = shift; + + my @vals = get_values ($sel, $key); + + my $filename = get_filename ($sel); + $filename =~ s/\.html$/__$key.html/; + my $title = get_title ($sel); + + open (FH, '> ' . $OUTPUTDIR . $filename) or die ('open: ' . $!); + + print FH head ($title, $title); + print FH '

', ucfirst ($key), "

\n"; + print FH qq#\n#; + print FH navbar ($sel); + print FH own_navbar ($sel, $key); + + my $graph_file = generate_graph ($sel, $key); + if ($graph_file) + { + print FH qq#

[graph]

\n#; + } + + for (sort (@vals)) + { + my $val = $_; + print FH generate_table ($sel, $key, $val); + } + + print FH foot (); + close (FH); +} + +sub generate_sub_index +{ + my $sel = shift; + + my $filename = get_filename ($sel); + my $title = get_title ($sel); + + open (FH, '> ' . $OUTPUTDIR . $filename) or die ('open: ' . $!); + + print FH head ($title, $title); + print FH navbar ($sel); + print FH own_navbar ($sel); + + print FH "\n"; + for (@{$sel->[1]}) + { + my $key = $_; + my @vals = get_values ($sel, $key); + my $num_vals = scalar (@vals); + + print FH " \n \n", + " \n \n"; + } + print FH "
", ucfirst ($key), "$num_vals entr", ($num_vals == 1 ? 'y' : 'ies'), + "
\n\n"; + + print FH foot (); + close (FH); +} + +sub generate_index_page +{ + open (FH, '> ' . $OUTPUTDIR . 'index.html') or die ('open: ' . $!); + + print FH head ("yaala $::VERSION", "yaala $::VERSION - Index"); + print FH navbar (); + + if (scalar (keys (%$::EXTRA))) + { + print FH "\n
\n\n"; + for (keys (%$::EXTRA)) + { + my $key = $_; + my $val = $::EXTRA->{$key}; + + print FH qq# \n \n \n \n#; + } + print FH "
$key$val
\n"; + } + else + { + print FH "\n\n"; + } + + print FH foot (); + close (FH); +} + +sub generate_table +{ + my $sel = shift; + my $key = shift; + my $val = shift; + + my @aggs = @{$sel->[0]}; + my $num_aggs = scalar (@aggs); + + my @keys = grep { $_ ne $key } (@{$sel->[1]}); + @keys = sort (@keys); + + + my $link_val = $val; + $link_val =~ s/\W//g; + + my $text = qq#\n
\n\n \n#; + $text .= ' \n \n"; + $text .= qq# \n \n# + . qq# \n#; + + my %grand_total = (); + for (@aggs) + { + $text .= qq# \n" + . qq# \n#; + + $grand_total{$_} = receive ($sel, $_, {$key => $val}); + } + $text .= " \n"; + + for (@keys) + { + my $second_key = $_; + my @second_vals = get_values ($sel, $second_key); + + my $tmp_text = ''; + my $first_line = 1; + my %sub_total = (); + + my $num_vals = scalar (@second_vals); + + for (sort (@second_vals)) + { + my $this_val = $_; + my $skipped_cells = 0; + my $tmp_text2 = ''; + + if (!$first_line) { $tmp_text2 = " \n"; } + $tmp_text2 .= qq# \n#; + + for (@aggs) + { + my $agg = $_; + my $sum = 0; + + if (!defined ($sub_total{$agg}) + or ($sub_total{$agg} != $grand_total{$agg})) + { + $sum = receive ($sel, $agg, {$key => $val, $second_key => $this_val}); + $sub_total{$agg} += $sum; + } + + if (!$sum and $skip_empty) + { + $skipped_cells++; + } + + my $print_sum = convert ($agg, $sum); + + my $percent = ($sum ? sprintf ("%.1f%%", 100 * $sum / $grand_total{$agg}) : ' '); + + $tmp_text2 .= " \n# + . qq# \n#; + } + + $tmp_text2 .= " \n"; + + if ($skipped_cells == $num_aggs) + { + $num_vals--; + } + else + { + $first_line = 0; + $tmp_text .= $tmp_text2; + } + } + + $text .= qq# \n \n" + . $tmp_text; + } + + $text .= qq# \n \n# + . qq# \n#; + for (@aggs) + { + my $print_sum = convert ($_, $grand_total{$_}); + + $text .= qq# \n# + . qq# \n#; + } + $text .= qq# \n\n# + . qq(

top ]

\n); + + return ($text); +} + +sub own_navbar +{ + my $sel = shift; + my $key = shift; + + if (!defined ($key)) { $key = ''; } + + my $base_filename = get_filename ($sel); + my $text = qq#\n"; + + if ($key) + { + my @vals = get_values ($sel, $key); + + $text .= qq#\n"; + } + + return ($text); +} diff --git a/lib/Yaala/Report/Combined.pm b/lib/Yaala/Report/Combined.pm new file mode 100644 index 0000000..633fcd6 --- /dev/null +++ b/lib/Yaala/Report/Combined.pm @@ -0,0 +1,431 @@ +package Yaala::Report; + +use strict; +use warnings; + +use Exporter; +use Yaala::Html qw#head foot escape navbar get_filename get_title#; +use Yaala::Data::Core qw#receive get_values#; +use Yaala::Data::Setup qw#$SELECTS#; +use Yaala::Data::Convert qw#convert#; +use Yaala::Config qw#get_config#; +use Yaala::Report::Core qw#$OUTPUTDIR#; +use Yaala::Report::GDGraph qw#generate_graph $GRAPH_WIDTH $GRAPH_HEIGHT#; + +@Yaala::Report::EXPORT_OK = qw#generate#; +@Yaala::Report::ISA = ('Exporter'); + +my $VERSION = '$Id: Combined.pm,v 1.10 2003/12/07 14:53:30 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +for (@$SELECTS) +{ + my $sel = $_; + while (scalar (@{$sel->[1]}) > 3) + { + my $ignore = pop (@{$sel->[1]}); + print STDERR $/, __FILE__, ": With the combined output only ", + "three fields are supported. ", + "Field ``$ignore'' will be ignored."; + } +} + +return (1); + +sub generate +{ + for (@$SELECTS) + { + my $sel = $_; + if (scalar (@{$sel->[1]}) == 1) + { + generate_1D_page ($sel); + } + elsif (scalar (@{$sel->[1]}) == 2) + { + generate_2D_page ($sel); + } + elsif (scalar (@{$sel->[1]}) == 3) + { + generate_3D_page ($sel); + } + else + { + die; + } + } + + generate_index_page (); + + return (1); +} + +sub generate_1D_page +{ + my $sel = shift; + my ($key) = @{$sel->[1]}; + + my $filename = get_filename ($sel); + my $title = get_title ($sel); + + open (FH, '> ' . $OUTPUTDIR . $filename) or die ('open: ' . $!); + + print FH head ($title, $title); + print FH navbar ($sel); + + print FH generate_1D_table ($sel, $key); + + print FH foot (); + close (FH); +} + +sub generate_2D_page +{ + my $sel = shift; + my ($key1, $key2) = @{$sel->[1]}; + + my $filename = get_filename ($sel); + my $title = get_title ($sel); + + open (FH, '> ' . $OUTPUTDIR . $filename) or die ('open: ' . $!); + + print FH head ($title, $title); + print FH navbar ($sel); + + print FH generate_2D_table ($sel, $key1, $key2); + + print FH foot (); + close (FH); +} + +sub generate_3D_page +{ + my $sel = shift; + my ($key1, $key2, $key3) = @{$sel->[1]}; + + my $filename = get_filename ($sel); + my $title = get_title ($sel); + + open (FH, '> ' . $OUTPUTDIR . $filename) or die ('open: ' . $!); + + print FH head ($title, $title); + print FH navbar ($sel); + + print FH generate_1D_table ($sel, $key3, 1); + + my @vals3 = get_values ($sel, $key3); + + for (sort (@vals3)) + { + my $val3 = $_; + print FH generate_2D_table ($sel, $key1, $key2, $key3, $val3); + } + + print FH foot (); + close (FH); +} + +sub generate_index_page +{ + open (FH, '> ' . $OUTPUTDIR . 'index.html') or die ('open: ' . $!); + + print FH head ("yaala $::VERSION", "yaala $::VERSION - Index"); + print FH navbar (); + + if (scalar (keys (%$::EXTRA))) + { + print FH "\n
\n\n"; + for (keys (%$::EXTRA)) + { + my $key = $_; + my $val = $::EXTRA->{$key}; + + print FH qq# \n \n \n \n#; + } + print FH "
$key$val
\n"; + } + else + { + print FH "\n\n"; + } + + print FH foot (); + close (FH); +} + +sub generate_1D_table +{ + my $sel = shift; + my $key = shift; + + my $do_links = 0; + if (@_) { $do_links = shift; } + + my @aggs = @{$sel->[0]}; + + my @vals = get_values ($sel, $key); + @vals = sort (@vals); + + my %grand_total = (); + for (@aggs) + { + $grand_total{$_} = receive ($sel, $_, {}); + } + + my $text = "\n
\n"; + + my $graph_file = generate_graph ($sel, $key); + if ($graph_file) + { + $text .= qq#

[graph]

\n#; + } + + $text .= "\n \n" + . ' \n \n"; + + if (scalar (@aggs) > 1) + { + $text .= qq# \n \n#; + $text .= qq# \n" for (@aggs); + $text .= " \n"; + } + + for (@vals) + { + my $val = $_; + + $text .= qq# \n \n"; + + for (@aggs) + { + my $agg = $_; + my $sum = receive ($sel, $agg, {$key => $val}); + my $print_sum = convert ($agg, $sum); + + $text .= qq#\n \n# + . " \n"; + } + } + + $text .= qq# \n \n# + . qq# \n# + } + $text .= qq#\n
' + . ucfirst ($key) . "
Aggregation# . ucfirst ($_) . "
#; + if ($do_links) + { + my $tmpval = $val; + $tmpval =~ s/\W//g; + $text .= qq(); + } + $text .= $val; + if ($do_links) + { + $text .= ''; + } + $text .= "$print_sum" . sprintf ("%.1f%%", 100 * $sum / $grand_total{$agg}) . "
Total\n#; + for (@aggs) + { + my $agg = $_; + my $print_sum = convert ($agg, $grand_total{$agg}); + + $text .= qq# $print_sum100.0%
\n#; + + return ($text); +} + +sub generate_2D_table +{ + my $sel = shift; + my $key1 = shift; + my $key2 = shift; + + my $text; + + my $key3 = ''; + my $val3 = ''; + if (scalar (@_) >= 2) + { + $key3 = shift; + $val3 = shift; + } + + my @aggs = @{$sel->[0]}; + my $num_aggs = scalar (@aggs); + + my @vals1 = get_values ($sel, $key1); + my @vals2 = get_values ($sel, $key2); + + @vals1 = sort (@vals1); + @vals2 = sort (@vals2); + + my %grand_total = (); + for (@aggs) + { + my $query = {}; + if ($key3 and $val3) + { + $query->{$key3} = $val3; + } + $grand_total{$_} = receive ($sel, $_, $query); + } + + my $target = ''; + if ($val3) + { + my $tmpval = $val3; + $tmpval =~ s/\W//g; + $text = qq#\n
\n#; + } + else + { + $text = qq#\n
\n#; + } + + my $graph_file = generate_graph ($sel, $key1, $key3, $val3); + if ($graph_file) + { + $text .= qq#

[graph]\n#; + + $graph_file = generate_graph ($sel, $key2, $key3, $val3); + $text .= qq# [graph]

\n#; + } + + $text .= qq#\n#; + + if ($key3 and $val3) + { + $text .= " \n"; + } + + my $agg_column_width = ''; + if ($num_aggs > 1) + { + $agg_column_width = qq# colspan="$num_aggs"#; + } + + # first line + $text .= qq# \n \n# + . ' \n" + . qq# \n# + . qq# \n# + . " \n"; + + # second line + $text .= " \n"; + for (@vals2) + { + $text .= qq# \n#; + } + $text .= qq# \n#; + + # third line (if appropriate only) + if ($num_aggs > 1) + { + $text .= " \n"; + + my $tmp = join ('', map { qq# \n# } (@aggs)); + $text .= $tmp x (2 + scalar (@vals2)); + + $text .= " \n"; + } + $text .= qq# \n \n"; + + my $this_is_the_first_line = 1; + for (@vals1) + { + my $val1 = $_; + + $text .= " \n" unless ($this_is_the_first_line); + $this_is_the_first_line = 0; + + $text .= qq# \n#; + + for (@vals2) + { + my $val2 = $_; + + my $query = { $key1 => $val1, $key2 => $val2 }; + if ($key3 and $val3) + { + $query->{$key3} = $val3; + } + + for (@aggs) + { + my $agg = $_; + + my $this_val = receive ($sel, $agg, $query); + my $print_val = convert ($agg, $this_val); + + $text .= ' \n"; + } + } + + my $query = { $key1 => $val1 }; + if ($key3 and $val3) + { + $query->{$key3} = $val3; + } + + my $tmp = ''; + for (@aggs) + { + my $this_val = receive ($sel, $_, $query); + my $print_val = convert ($_, $this_val); + + $text .= ' \n"; + $tmp .= ' \n"; + } + $text .= $tmp . " \n"; + } + # TODO 2003-05-10 13:00 + $text .= qq# \n \n#; + my @percentages = (); + for (@vals2) + { + my $val2 = $_; + + my $query = { $key2 => $val2 }; + if ($key3 and $val3) + { + $query->{$key3} = $val3; + } + + for (@aggs) + { + my $agg = $_; + + my $this_val = receive ($sel, $agg, $query); + my $print_val = convert ($agg, $this_val); + + $text .= ' \n"; + + my $pc = ($this_val ? sprintf ("%.1f%%", 100 * $this_val / $grand_total{$agg}) : ' '); + push (@percentages, $pc); + } + } + + for (@aggs) + { + my $agg = $_; + + my $print_val = convert ($agg, $grand_total{$agg}); + $text .= ' \n"; + } + + $text .= qq# \n# + . qq# \n# + . qq# \n# + . qq# \n#; + $text .= qq# \n# for (@percentages); + $text .= qq# \n# + . qq# \n# + . qq#
$val3
# . ucfirst ($key2) . "TotalPercent
$_
$_
# . ucfirst ($key1) . "
$val1' . ($print_val ? $print_val : ' ') . "' . ($print_val ? $print_val : ' ') . "' + . ($this_val ? sprintf ("%.1f%%", 100 * $this_val / $grand_total{$_}) : ' ') + . "
Total' . ($print_val ? $print_val : ' ') . "' . ($print_val ? $print_val : ' ') . " 
Percent$_ 
\n#; + + return ($text); +} diff --git a/lib/Yaala/Report/Core.pm b/lib/Yaala/Report/Core.pm new file mode 100644 index 0000000..3ebf979 --- /dev/null +++ b/lib/Yaala/Report/Core.pm @@ -0,0 +1,42 @@ +package Yaala::Report::Core; + +use strict; +use warnings; +use vars qw#$OUTPUTDIR#; + +use Exporter; +use Yaala::Config qw#get_config#; + +@Yaala::Report::Core::EXPORT_OK = qw#$OUTPUTDIR#; +@Yaala::Report::Core::ISA = ('Exporter'); + +my $VERSION = '$Id: Core.pm,v 1.5 2003/12/07 14:53:30 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +$OUTPUTDIR = get_config ('directory'); +if ($OUTPUTDIR) +{ + $OUTPUTDIR =~ s#/$##; + if (!-d $OUTPUTDIR) + { + print STDERR $/, __FILE__, qq#: Directory "$OUTPUTDIR" does not exist.\n#; + exit (1); + } + $OUTPUTDIR .= '/'; +} +elsif (-d 'reports') +{ + $OUTPUTDIR = 'reports/'; +} +else +{ + print STDERR $/, __FILE__, ": Unknown output directory. Using current ", + "directory instead!", + $/, __FILE__, ": To abort press CTRL+c within the next 10 secons"; + + sleep 10; + + $OUTPUTDIR = './'; +} + +return (1); diff --git a/lib/Yaala/Report/GDGraph.pm b/lib/Yaala/Report/GDGraph.pm new file mode 100644 index 0000000..eebde55 --- /dev/null +++ b/lib/Yaala/Report/GDGraph.pm @@ -0,0 +1,329 @@ +package Yaala::Report::GDGraph; + +use strict; +use warnings; +use vars qw#$GRAPH_WIDTH $GRAPH_HEIGHT#; + +use Exporter; +use Yaala::Data::Core qw#get_values receive#; +use Yaala::Config qw#get_config#; +use Yaala::Html qw#get_filename get_title#; +use Yaala::Report::Core qw#$OUTPUTDIR#; + +@Yaala::Report::GDGraph::EXPORT_OK = qw#generate_graph $GRAPH_WIDTH $GRAPH_HEIGHT#; +@Yaala::Report::GDGraph::ISA = ('Exporter'); + +$GRAPH_WIDTH = 500; +$GRAPH_HEIGHT = 250; + +our $HAVE_GD_GRAPH = 0; +our $MAX_VALUES = 25; +our $WANT_GRAPHS = 0; + +my $VERSION = '$Id: GDGraph.pm,v 1.9 2003/12/07 14:53:30 octo Exp $'; +print STDERR $/, __FILE__, ": $VERSION" if ($::DEBUG); + +eval "use GD::Graph::bars;"; +if (!$@) +{ + $HAVE_GD_GRAPH = 1; + print STDERR ' - GD::Graph is installed' if ($::DEBUG); +} +else +{ + print STDERR ' - GD::Graph is NOT installed' if ($::DEBUG); +} + +$WANT_GRAPHS = $HAVE_GD_GRAPH; + +if (get_config ('graph_height')) +{ + my $height = get_config ('graph_height'); + $height =~ s/\D//g; + + if (($height > 100) and ($height < 1000)) + { + $GRAPH_HEIGHT = $height; + } + else + { + print STDERR $/, __FILE__, ": ``$height'' is not a valid value for ``graph_height'' and will be ignored."; + } +} + +if (get_config ('graph_width')) +{ + my $width = get_config ('graph_width'); + $width =~ s/\D//g; + + if (($width > 100) and ($width < 1000)) + { + $GRAPH_WIDTH = $width; + $MAX_VALUES = int ($GRAPH_WIDTH / 20); + } + else + { + print STDERR $/, __FILE__, ": ``$width'' is not a valid value for ``graph_width'' and will be ignored."; + } +} + +if (get_config ('print_graphs')) +{ + my $want = lc (get_config ('print_graphs')); + if ($want eq 'no' or $want eq 'false' or $want eq 'off') + { + $WANT_GRAPHS = 0; + } + elsif ($want eq 'yes' or $want eq 'true' or $want eq 'on') + { + if (!$HAVE_GD_GRAPH) + { + print STDERR $/, __FILE__, ": You've set ``print_graphs'' to ``$want''.", + $/, __FILE__, ' However, the graphs cannot be genereted, because GD::Graph cannot be found.', + $/, __FILE__, ' Please go to your nearest CPAN-mirror and install it first.', + $/, __FILE__, ' This config-option will be ignored.'; + } + } + elsif ($want eq 'auto' or $want eq 'automatic') + { + # do nothing.. Already been done. + } + else + { + print STDERR $/, __FILE__, ": You've set ``print_graphs'' to ``$want''.", + $/, __FILE__, ' This value is not understood and is being ignored.'; + } +} + +if ($::DEBUG & 0x100) +{ + print STDERR $/, __FILE__, ': Size: ', $GRAPH_WIDTH, 'x', $GRAPH_HEIGHT, + "; Max number of values: $MAX_VALUES"; +} + +return (1); + +sub generate_graph +{ + my $sel = shift; + my $key = shift; + + my $where_key = shift; + my $where_val = shift; + + return ('') unless ($HAVE_GD_GRAPH and $WANT_GRAPHS); + + if (!defined ($where_key) or !defined ($where_val) + or !$where_key or !$where_val) + { + $where_key = ''; + $where_val = ''; + } + + my @aggs = @{$sel->[0]}; + my $num_aggs = scalar (@aggs); + + my $filename = get_filename ($sel); + { + my $replacement = "__$key"; + if ($where_key) + { + $replacement .= "__$where_key" . "_$where_val"; + } + $replacement =~ s/\W+/_/g; + $replacement .= '.png'; + + $filename =~ s/\.html$/$replacement/; + } + + my @key_values = get_values ($sel, $key); + @key_values = sort (@key_values); + + my @agg_values = get_agg_values ($sel, $key, \@key_values, $where_key, $where_val); + + if (scalar (@key_values) > $MAX_VALUES) + { + discard_values (\@key_values, \@agg_values); + } + + for (@key_values) + { + next if (length ($_) < 20); + + substr ($_, 17) = ' ..'; + } + + my @data_set = (\@key_values, @agg_values); + + my $title = join (', ', map { ucfirst ($_) } (@aggs)) . ' by ' . ucfirst ($key); + if ($where_val) { $title .= ' for ' . $where_val; } + + print STDERR $/, __FILE__, qq#: Generating image "$title" [$filename]# + if ($::DEBUG & 0x100); + + my $graph = GD::Graph::bars->new ($GRAPH_WIDTH, $GRAPH_HEIGHT); + $graph->set + ( + title => $title, + x_label => ucfirst ($key), + y_label => 'Percent', + + x_labels_vertical => 1, + x_label_position => 1, + long_ticks => 1, + +# logo => 'reports/logo.png', + transparent => 1, + shadow_depth => 2, + + fgclr => 'lgray', + bgclr => 'white', + dclrs => [ qw(lgray gray dgray) ], + borderclrs => [ qw(black black black) ], + shadowclr => 'gray', + labelclr => 'black', + axislabelclr => 'black', + legendclr => 'black', + valuesclr => 'black', + textclr => 'black' + ); + + if ($num_aggs > 1) + { + $graph->set (legend_placement => 'BR'); + $graph->set_legend (map { ucfirst ($_) } (@aggs)); + } + + if (open (IMG, "> $OUTPUTDIR$filename")) + { + binmode IMG; + print IMG $graph->plot(\@data_set)->png; + close IMG; + } + else + { + print STDERR $/, __FILE__, ": Unable to open ``$filename'': $!"; + $filename = undef; + } + + return ($filename); +} + +sub discard_values +{ + my $key_array = shift; + my $val_array = shift; + + my @orig_sort = @$key_array; + my $num_values = scalar (@$key_array); + + return (1) if ($num_values < $MAX_VALUES); + + my %vals_by_key = (); + my %tmp_hash = (); + + my $i; + for ($i = 0; $i < $num_values; $i++) + { + my $key = shift (@$key_array); + my @vals = (); + my $sum = 0; + + for (@$val_array) + { + my $tmp = shift (@$_); + push (@vals, $tmp); + $sum += $tmp; + } + + $vals_by_key{$key} = \@vals; + $tmp_hash{$key} = $sum; + } + + my @small_sorted = sort { $tmp_hash{$b} <=> $tmp_hash{$a} } (keys (%tmp_hash)); + + for ($i = 0; $i < $MAX_VALUES; $i++) + { + shift (@small_sorted); + } + + for (@orig_sort) + { + my $this_key = $_; + if (grep { $_ eq $this_key } (@small_sorted)) + { + #$other += $tmp_hash{$this_key}; + } + else + { + push (@$key_array, $this_key); + my $vals = $vals_by_key{$this_key}; + for (@$val_array) + { + my $val = shift (@$vals); + push (@$_, $val); + } + } + } +} + +sub get_agg_values +{ + my $sel = shift; + my $key = shift; + my $key_values = shift; + + my $where_key = ''; + my $where_val = ''; + + if (@_) + { + $where_key = shift; + $where_val = shift; + } + + my @aggs = @{$sel->[0]}; + my @agg_values = (); + + my %max_val = (); + + for (@aggs) + { + my $agg = $_; + my @tmp = (); + $max_val{$agg} = 0; + + my $grand_total = 0; + #if (scalar (@aggs) > 1) + { + my %query = (); + if ($where_key) { $query{$where_key} = $where_val; } + + $grand_total = receive ($sel, $agg, {}); + } + + for (@$key_values) + { + my %query = ($key => $_); + if ($where_key) { $query{$where_key} = $where_val; } + + my $sum = receive ($sel, $agg, \%query); + + if ($grand_total) + { + $sum = 100 * $sum / $grand_total; + } + + push (@tmp, $sum); + + if ($sum > $max_val{$agg}) + { + $max_val{$agg} = $sum; + } + } + + push (@agg_values, \@tmp); + } + + return (@agg_values); +} diff --git a/packaging/yaala.cron b/packaging/yaala.cron new file mode 100644 index 0000000..90c561f --- /dev/null +++ b/packaging/yaala.cron @@ -0,0 +1,8 @@ +#!/bin/bash +# Generate a report, if the logfile is found. + +if [[ -s /var/log/httpd/access_log ]] ; then + /var/lib/yaala/yaala --config common_log.conf +fi + +exit 0 diff --git a/packaging/yaala.spec b/packaging/yaala.spec new file mode 100644 index 0000000..9cb4ed5 --- /dev/null +++ b/packaging/yaala.spec @@ -0,0 +1,71 @@ +%define ver 0.7.2 +Name: yaala +Summary: A very flexible log file analysis program for a variety of logfiles. +Group: Applications/Internet +Version: %{ver} +Release: 1 +Source0: http://yaala.org/files/%{name}-%{ver}.tar.bz2 +URL: http://yaala.org/ +License: GPL +Requires: perl >= 5.005, webserver +AutoReqProv: no +BuildArch: noarch +Buildroot: %{_tmppath}/%{name}-root +Packager: Florian octo Forster + +%description +yaala parses logfiles and generates very detailed statistics in HTML +format. The information one will get can be selected by using SQL-like +expressions, which provide filtering with relational operators as well as +regular expressions. It includes input parsers for the Common Log Format, +NCSA logs, Squid access logs, the xferlog format, bind9's query logs, and +postfix entries in the maillog. + +%prep +%setup + +%install +rm -fr $RPM_BUILD_ROOT + +mkdir -p $RPM_BUILD_ROOT/var/lib/yaala/lib/Yaala/Data \ + $RPM_BUILD_ROOT/var/lib/yaala/lib/Yaala/Parser \ + $RPM_BUILD_ROOT/var/lib/yaala/lib/Yaala/Report + +mkdir -p $RPM_BUILD_ROOT/var/www/html/usage \ + $RPM_BUILD_ROOT/etc/cron.daily + +install -m 555 yaala $RPM_BUILD_ROOT/var/lib/yaala + +install -m 444 lib/Yaala/*.pm $RPM_BUILD_ROOT/var/lib/yaala/lib/Yaala +install -m 444 lib/Yaala/Data/*.pm $RPM_BUILD_ROOT/var/lib/yaala/lib/Yaala/Data +install -m 444 lib/Yaala/Parser/*.pm $RPM_BUILD_ROOT/var/lib/yaala/lib/Yaala/Parser +install -m 444 lib/Yaala/Report/*.pm $RPM_BUILD_ROOT/var/lib/yaala/lib/Yaala/Report + +install -m 644 sample_configs/common_log.conf $RPM_BUILD_ROOT/var/lib/yaala +install -m 644 webserver.config $RPM_BUILD_ROOT/var/lib/yaala +install -m 444 reports/*.png reports/*.css $RPM_BUILD_ROOT/var/www/html/usage +install -m 755 packaging/yaala.cron $RPM_BUILD_ROOT/etc/cron.daily/00yaala + +%clean +rm -fr $RPM_BUILD_ROOT + +%files +%defattr(-,root,root) +%doc AUTHORS CHANGELOG COPYING README README.persistency README.selections +%config(noreplace) /etc/cron.daily/00yaala +/var/lib/yaala +/var/www/html/usage + +%changelog +* Sun Dec 07 2003 Florian Forster 0.7.2 +- Modules have been moved to another directory + +* Thu Sep 25 2003 Florian Forster 0.7.1 +- Changed URLs to point to yaala.org +- rebuilt for version 0.7.1 + +* Tue Aug 19 2003 Florian Forster 0.7.0 +- rebuilt for version 0.7.0 + +* Thu Jun 05 2003 Florian Forster 0.6.7 +- Initial build. diff --git a/reports/dot-dark.png b/reports/dot-dark.png new file mode 100644 index 0000000000000000000000000000000000000000..1a75fcd439b0a0138c252438b1693cf18c5977fa GIT binary patch literal 82 zcmeAS@N?(olHy`uVBq!ia0vp^j3CUx0wlM}@Gt=>rX+877Y2q^y~;*F9=E59V@SoV drX+877Y2q^y~;*F9=E59V@SoV bNS%G}EByV>Y zhW{YAVDIwDKoQOYkH}&M25un`X1sK_?hjCqy~NYkmHiSIH=m5mx9TM}K%onsE{-7; zx88;a^05f=+{a8_}d*TecQdOE=Vw2{{ zqkUQaUCg?u zdhU|Ugb7)~tf#LvsxpXZ8XT*6WYO|Lc;&QmErmteKj#$3Ycgbo3E%SE(Z)3|sz0mb z^x}m_J5EGr>tEGb%=Gt|nrhY&#w#ms8igzl*^r}uVe#fD8HfCXH_xv4r8J9e!57=< d- # +# Contributions are listed in AUTHORS # +########################################################################## + +BEGIN +{ + if ($0 =~ m#^(.*)[/\\]#) { chdir ($1); } + + unshift (@::INC, 'lib'); + +# 0x010: lib/Data/Core.pm +# 0x020: lib/Data/Setup.pm +# 0x040: lib/Data/Convert.pm +# 0x080: lib/Data/Core.pm (dump any data stored!) +# 0x100: lib/Report/GDGraph.pm +# 0x200: lib/Data/Persistent.pm + $::DEBUG = 0x0000; +} + +use strict; +use warnings; +use vars qw( + $DEBUG + $EXTRA + + $NAME + $VERSION + $HOMEPAGE +); + +use Carp; +use Yaala::Config qw#get_config parse_argv read_config#; + +$NAME = 'yaala'; +$VERSION = '0.7.3'; +$HOMEPAGE = 'http://yaala.org/'; + +if ($DEBUG) +{ + select STDOUT; + $| = 1; +} + +$EXTRA = {}; + +print STDERR $/, __FILE__, ': $Id: yaala,v 1.17 2004/11/10 10:07:43 octo Exp $' if ($DEBUG); + +parse_argv (@ARGV); +read_config (get_config ('config') ? get_config ('config') : 'config'); + +unless (get_config ('input')) +{ + usage (); + exit (1); +} + +# report and data initialization needs parser module +my $logtype = get_config ('logtype'); +my $report = get_config ('report' ); +$logtype ||= 'Common'; +$report ||= 'Combined'; +$logtype = ucfirst (lc ($logtype)); +$report = ucfirst (lc ($report )); + +require "Yaala/Parser/$logtype.pm"; +require "Yaala/Report/$report.pm"; +import Yaala::Parser qw#parse extra#; +import Yaala::Report qw#generate#; + +print STDERR $/, __FILE__, ": Accumulating data.." if ($DEBUG); + +my $num_read_files = 0; + +for (get_config ('input')) +{ + #no strict 'refs'; + if (open (LOGFILE, '< ' . $_)) + { + print STDERR $/, __FILE__, qq#: Reading "$_"# if ($DEBUG); + $num_read_files++; + + parse ($_) while (); + + close LOGFILE; + } + else + { + print STDERR $/, __FILE__, qq#: Error opening "$_": $!#; + } +} +if (!$num_read_files) +{ + print STDERR $/, __FILE__, ": Could not read any files. Exiting.\n"; + exit (1); +} + +extra (); + +print STDERR $/, __FILE__, ': Generating pages..' if ($DEBUG); +generate (); + +print STDERR $/, __FILE__, ": Exiting.." if ($DEBUG); + +exit (0); + +################################################## +# end of main program # +#---=====================------------------------# +# surprised?? well, it's pretty short, cause all # +# the _real_ work is done in the the modules. # +# If you write a modul by your own: PLEASE send # +# me a copy so that i can include it in the # +# package. # +# And how about 12 modules at a time ? -- qmax # +# Awesome :) -- octo # +################################################## + +sub usage +{ + print STDOUT < ] file1 .. fileN + +Options: + --config Specify alternate config file + --directory yaala will write all generated files to this + directory (and overwrite existing ones without + prompting!) + --report Selects the report type to use + --logtype Specifies the type of logfiles to parse + --select Select statements. See README.selections + +You can prepend two dashes to every keyword in the config file and +configure yaala from the command line. +EOF + return (1); +} + +END +{ + print STDERR $/ if ($DEBUG); +} -- 2.11.0