• Skip to content
  • Skip to link menu
KDE 4.1 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KHTML

khtml_filter.cpp

Go to the documentation of this file.
00001 /* This file is part of the KDE project
00002 
00003    Copyright (C) 2005 Ivor Hewitt     <ivor@kde.org>
00004    Copyright (C) 2008 Maksim Orlovich <maksim@kde.org>
00005    Copyright (C) 2008 Vyacheslav Tokarev <tsjoker@gmail.com>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Library General Public
00009    License as published by the Free Software Foundation; either
00010    version 2 of the License, or (at your option) any later version.
00011 
00012    This library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Library General Public License for more details.
00016 
00017    You should have received a copy of the GNU Library General Public License
00018    along with this library; see the file COPYING.LIB.  If not, write to
00019    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00020    Boston, MA 02110-1301, USA.
00021 */
00022 
00023 #include "khtml_filter_p.h"
00024 #include <QDebug>
00025 
00026 // rolling hash parameters
00027 #define HASH_P (2003)
00028 #define HASH_Q (8191)
00029 // HASH_MOD = (HASH_P^7) & HASH_Q
00030 #define HASH_MOD (6843)
00031 
00032 namespace khtml {
00033 
00034 void FilterSet::addFilter(const QString& filterStr)
00035 {
00036     QString filter = filterStr;
00037     
00038     if (filter.startsWith(QLatin1Char('!')))
00039         return;
00040 
00041     // Strip leading @@
00042     int first = 0;
00043     int last  = filter.length() - 1;
00044     if (filter.startsWith(QLatin1String("@@")))
00045         first = 2;
00046         
00047     // Strip options, we ignore them for now.
00048     int dollar = filter.lastIndexOf(QLatin1Char('$'));
00049     if (dollar != -1)
00050         last = dollar - 1;
00051     
00052     // Perhaps nothing left?
00053     if (first > last)
00054         return;
00055         
00056     filter = filter.mid(first, last - first + 1);
00057     
00058     // Is it a regexp filter?
00059     if (filter.length()>2 && filter.startsWith(QLatin1Char('/')) && filter.endsWith(QLatin1Char('/')))
00060     {
00061         QString inside = filter.mid(1, filter.length()-2);
00062         QRegExp rx(inside);
00063         reFilters.append(rx);
00064 //         qDebug() << "R:" << inside;
00065     }
00066     else
00067     {
00068         // Nope, a wildcard one.
00069         // Note: For these, we also need to handle |.
00070         
00071         // Strip wildcards at the ends
00072         first = 0;
00073         last  = filter.length() - 1;
00074         
00075         while (first < filter.length() && filter[first] == QLatin1Char('*'))
00076             ++first;
00077             
00078         while (last >= 0 && filter[last] == QLatin1Char('*'))
00079             --last;
00080             
00081         if (first > last)
00082             filter = QLatin1String("*"); // erm... Well, they asked for it.
00083         else
00084             filter = filter.mid(first, last - first + 1);
00085             
00086         // Now, do we still have any wildcard stuff left?
00087         if (filter.contains("*") || filter.contains("?")) 
00088         {
00089 //             qDebug() << "W:" << filter;
00090             QRegExp rx;
00091 
00092             rx.setPatternSyntax(QRegExp::Wildcard);
00093             rx.setPattern(filter);
00094             reFilters.append(rx);        
00095         }
00096         else
00097         {
00098             // Fast path
00099             stringFiltersMatcher.addString(filter);
00100         }
00101     }
00102 }
00103 
00104 bool FilterSet::isUrlMatched(const QString& url)
00105 {
00106     if (stringFiltersMatcher.isMatched(url))
00107         return true;
00108 
00109     for (int c = 0; c < reFilters.size(); ++c)
00110     {
00111         if (url.contains(reFilters[c]))
00112             return true;
00113     }
00114 
00115     return false;
00116 }
00117 
00118 void FilterSet::clear()
00119 {
00120     reFilters.clear();
00121     stringFiltersMatcher.clear();
00122 }
00123 
00124 
00125 void StringsMatcher::addString(const QString& pattern)
00126 {
00127     if (pattern.length() < 8) {
00128         // hanlde short string differently
00129         shortStringFilters.append(pattern);
00130     } else {
00131         // use modified Rabin-Karp's algorithm with 8-length string hash
00132         // i.e. store hash of first 8 chars in the HashMap for fast look-up
00133         stringFilters.append(pattern);
00134         int ind = stringFilters.size() - 1;
00135         int current = 0;
00136 
00137         // compute hash using rolling hash
00138         // hash for string: x0,x1,x2...xn-1 will be:
00139         // (p^(n-1)*x0 + p^(n-2)*x1 + ... + p * xn-2 + xn-1) % q
00140         // where p and q some wisely-chosen integers
00141         for (int k = 0; k < 8; ++k)
00142             current = (current * HASH_P + pattern[k].unicode()) & HASH_Q;
00143 
00144         // insert computed hash value into HashMap
00145         WTF::HashMap<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1);
00146         if (it == stringFiltersHash.end()) {
00147             QVector<int> list;
00148             list.append(ind);
00149             stringFiltersHash.add(current + 1, list);
00150         } else {
00151             it->second.append(ind);
00152         }
00153     }
00154 }
00155 
00156 bool StringsMatcher::isMatched(const QString& str) const
00157 {
00158     // check short strings first
00159     for (int i = 0; i < shortStringFilters.size(); ++i) {
00160         if (str.contains(shortStringFilters[i]))
00161             return true;
00162     }
00163 
00164     int len = str.length();
00165     int k;
00166 
00167     int current = 0;
00168     // compute hash for first 8 characters
00169     for (k = 0; k < 8 && k < len; ++k)
00170         current = (current * HASH_P + str[k].unicode()) & HASH_Q;
00171 
00172     WTF::HashMap<int, QVector<int> >::const_iterator hashEnd = stringFiltersHash.end();
00173     // main Rabin-Karp's algorithm loop
00174     for (k = 7; k < len; ++k) {
00175         // look-up the hash in the HashMap and check all strings
00176         WTF::HashMap<int, QVector<int> >::const_iterator it = stringFiltersHash.find(current + 1);
00177 
00178         // check possible strings
00179         if (it != hashEnd) {
00180             for (int j = 0; j < it->second.size(); ++j) {
00181                 int index = it->second[j];
00182                 int flen = stringFilters[index].length();
00183                 if (k - 8 + flen < len && stringFilters[index] == str.midRef(k - 7, flen))
00184                     return true;
00185             }
00186         }
00187 
00188         // roll the hash if not at the end
00189         if (k + 1 < len)
00190             current = (HASH_P * ((current + HASH_Q + 1 - ((HASH_MOD * str[k - 7].unicode()) & HASH_Q)) & HASH_Q) + str[k + 1].unicode()) & HASH_Q;
00191     }
00192 
00193     return false;
00194 }
00195 
00196 }
00197 
00198 // kate: indent-width 4; replace-tabs on; tab-width 4; space-indent on;

KHTML

Skip menu "KHTML"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • KIO
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • Kross
  • KUtils
  • Nepomuk
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.5.6
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal