1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
|
diff -r 6c4735564ddc src/Makefile.am
--- a/src/Makefile.am Sun Dec 21 06:50:09 2008 +0000
+++ b/src/Makefile.am Sun Dec 21 23:56:33 2008 +0000
@@ -28,6 +28,8 @@ dillo_SOURCES = \
bw.c \
cookies.c \
cookies.h \
+ adblock.c \
+ adblock.h \
auth.c \
auth.h \
colors.c \
diff -r 6c4735564ddc src/adblock.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/adblock.c Sun Dec 21 23:56:33 2008 +0000
@@ -0,0 +1,293 @@
+/*
+ * An ad blocker thing following the Adblock Plus syntax found at
+ * http://adblockplus.org/en/filters as it was in December 2008.
+ *
+ * This is not very tidy yet since I don't think it's in any danger of
+ * getting into the real tree without being changed into a dpi.
+ * Hence the conversational tone and everything...
+ *
+ * - Comment lines begin with '!'.
+ * - A "basic" rule has an implicit wildcard at each end. i.e., it's
+ * just looking for a substring. A '|' character before or after
+ * means turn off the wildcard behavior here.
+ * - A basic rule can contain wildcards. They may just mean '*'.
+ * I don't know whether '?' is supposed to be special.
+ * - I think they regard any rule of the form /something/ as a regexp rule.
+ *
+ * - Now you prefix your rule with "@@" if it's an "exception rule" that
+ * _prevents_ blocking.
+ * - And you can append '$' followed by comma-separated options.
+ * Mostly these specify what types of URLs should be filtered,
+ * i.e., image URL, script URL, etc. If the type is prefixed by a '~',
+ * it means "_don't_ filter this type".
+ * There is also an option for case-sensitive matching.
+ *
+ * LIMITATIONS
+ * 1. Is probably as slow as molasses.
+ * 2. Treats all non-regexp rules as case-sensitive.
+ * GNU's fnmatch does have a FNM_CASEFOLD, though...
+ * Just translating everything into regexps might be easiest in
+ * a way, but I doubt it would help the presumably-slow speed.
+ * 3. Doesn't handle element hiding.
+ */
+
+#include <ctype.h>
+
+#include <fnmatch.h>
+#include <regex.h>
+
+#include "msg.h"
+#include "adblock.h"
+
+
+/* Filename in ~/.dillo/ . A preference to point directly to the file used by
+ * Adblock Plus would be nice.
+ */
+#define ADBLOCK_FILENAME "adblock"
+
+
+typedef enum {
+ ADBLOCK_ALLOW = 1 << 0,
+ ADBLOCK_REGEXP = 1 << 1,
+ ADBLOCK_MATCH_CASE = 1 << 2,
+} AdblockFlag_t;
+
+typedef struct {
+ const char *name;
+ AdblockType_t type;
+} AdblockOption_t;
+
+typedef struct {
+ char *str;
+ int flags;
+ int types;
+} AdblockRule_t;
+
+/*
+ * Types that don't apply to Dillo aren't included. I'm thinking that
+ * ADBLOCK_DOCUMENT could be used for HTTP redirection...
+ */
+const AdblockOption_t Options[] = {
+ {"image", ADBLOCK_IMAGE},
+ {"stylesheet", ADBLOCK_STYLESHEET},
+ {"document", ADBLOCK_DOCUMENT},
+};
+
+
+static Dlist *adblock_rules;
+
+
+/*
+ * Parse filter options. A typical options string might look
+ * something like "match-case, image, stylesheet"
+ *
+ * Return nonzero iff this rule is meaningful for Dillo.
+ * There's no sense in keeping a rule around that's only for
+ * XBL or DTD or whatever...
+ */
+static int Adblock_parse_filter_options(const char *options,
+ AdblockRule_t *rule)
+{
+ const int listlen = sizeof(Options) / sizeof(AdblockOption_t);
+ bool_t inverse = FALSE, types_seen = FALSE;
+ const char *ptr;
+ int i;
+
+ ptr = options;
+
+ while (*ptr) {
+ while (isspace(*ptr))
+ ptr++;
+ if (!dStrncasecmp(ptr, "match-case", 10)) {
+ rule->flags |= ADBLOCK_MATCH_CASE;
+ ptr += 10;
+ } else {
+ types_seen = TRUE;
+
+ if (*ptr == '~') {
+ if (!inverse) {
+ /* Initialize. First inverse seen (unless the user is mixing
+ * inverse and 'regular' type specifications, which seems
+ * inadvisable).
+ */
+ rule->types = ADBLOCK_ALL;
+ }
+ inverse = TRUE;
+ ptr++;
+ } else {
+ inverse = FALSE;
+ }
+ for (i = 0; i < listlen; i++) {
+ const char *name = Options[i].name;
+ int len = strlen(name);
+ if (!dStrncasecmp(ptr, name, len) &&
+ ptr[len] != '-' && !isalpha(ptr[len])) {
+ /* match */
+ if (inverse) {
+ rule->types &= ~Options[i].type;
+ } else {
+ rule->types |= Options[i].type;
+ }
+ ptr += len;
+ break;
+ }
+ }
+ }
+ while (*ptr && *ptr != ',')
+ ptr++;
+ if (*ptr)
+ ptr++;
+ }
+ if (types_seen == FALSE) {
+ rule->types = ADBLOCK_ALL;
+ }
+ return (rule->types != 0);
+}
+
+/*
+ * Parse one line.
+ *
+ * A rule "with everything" might look something like
+ * "@@|text|$~object,match-case".
+ * There are also regex rules, "/text/" , which serve to add complexity.
+ */
+static AdblockRule_t *Adblock_parse_line(char *line)
+{
+ enum {BASIC_NO_WILDCARD, BASIC_WILDCARD, REGEXP_POSSIBLE};
+ int len, start;
+ const char *ptr;
+ Dstr *dstr;
+ AdblockRule_t *rule;
+ bool_t keep = TRUE;
+
+ dStrstrip(line);
+ ptr = line;
+ if (*ptr == '\0' || *ptr == '!') {
+ /* empty or comment */
+ return NULL;
+ }
+ rule = dNew0(AdblockRule_t, 1);
+
+ if (*ptr == '@' && ptr[1] == '@') {
+ rule->flags = ADBLOCK_ALLOW;
+ ptr += 2;
+ }
+ if (*ptr == '/') {
+ start = REGEXP_POSSIBLE;
+ }else if (*ptr == '|') {
+ start = BASIC_NO_WILDCARD;
+ ptr++;
+ } else {
+ start = BASIC_WILDCARD;
+ }
+ len = strcspn(ptr, "|$");
+ dstr = dStr_new("");
+
+ if (start == REGEXP_POSSIBLE && ptr[len-1] == '/' && ptr[len] != '|') {
+ /* /text/, and regexec() doesn't want the '/'s */
+ rule->flags |= ADBLOCK_REGEXP;
+ ptr++;
+ len -= 2;
+ } else if (start != BASIC_NO_WILDCARD) {
+ /* wildcard at beginning */
+ dStr_append_c(dstr, '*');
+ }
+ if (len > 0) {
+ dStr_append_l(dstr, ptr, len);
+ ptr += len;
+ }
+ if (*ptr == '|' || (rule->flags & ADBLOCK_REGEXP)) {
+ ptr++;
+ } else {
+ /* wildcard at end */
+ dStr_append_c(dstr, '*');
+ }
+ rule->str = dstr->str;
+ dStr_free(dstr, 0);
+
+ if (*ptr != '$') {
+ rule->types = ADBLOCK_ALL;
+ } else {
+ ptr++;
+ keep = Adblock_parse_filter_options(ptr, rule);
+ }
+ MSG("%s\n%s\nimg%d sty%d doc%d allow%d matchcase%d%s\n\n", line, rule->str,
+ rule->types & ADBLOCK_IMAGE, rule->types & ADBLOCK_STYLESHEET,
+ rule->types & ADBLOCK_DOCUMENT, rule->flags & ADBLOCK_ALLOW,
+ rule->flags & ADBLOCK_MATCH_CASE, (keep ? "" : "\nDISCARD!"));
+
+ if (!keep) {
+ dFree(rule);
+ rule = NULL;
+ }
+ return rule;
+}
+
+/*
+ * Initialize, reading rules from file.
+ */
+void a_Adblock_init()
+{
+ FILE *F_in;
+ char *filename, *line;
+
+ adblock_rules = dList_new(1);
+ filename = dStrconcat(dGethomedir(), "/.dillo/", ADBLOCK_FILENAME, NULL);
+
+ if ((F_in = fopen(filename, "r"))) {
+ while ((line = dGetline(F_in)) != NULL) {
+ AdblockRule_t *rule;
+
+ if ((rule = Adblock_parse_line(line)))
+ dList_append(adblock_rules, rule);
+ dFree(line);
+ }
+ fclose(F_in);
+ } else {
+ MSG("adblock: Can't open rules file %s\n", filename);
+ }
+ dFree(filename);
+}
+
+/*
+ * Has this URL been blocked by the user?
+ */
+bool_t a_Adblock_permitted(const DilloUrl *url, AdblockType_t t)
+{
+ int i;
+ AdblockRule_t *rule;
+ bool_t allow = TRUE, match = FALSE;
+ regex_t buffer;
+
+ for (i = 0; (rule = dList_nth_data(adblock_rules, i)); i++) {
+ if (rule->types & t) {
+ if (rule->flags & ADBLOCK_REGEXP) {
+ int cflags = REG_NOSUB;
+ if (!(rule->flags & ADBLOCK_MATCH_CASE))
+ cflags |= REG_ICASE;
+ if (regcomp(&buffer, rule->str, cflags)) {
+ MSG("regcomp didn't like rule string %s\n", rule->str);
+ /* Which might not mean that there's anything _wrong_ with
+ * the string necessarily. We'll see...
+ */
+ } else {
+ match = (regexec(&buffer, URL_STR(url), 0, NULL,
+ 0) == 0);
+ regfree(&buffer);
+ }
+ } else {
+ match = (fnmatch(rule->str, URL_STR(url), 0) == 0);
+ }
+ if (match) {
+ if (rule->flags & ADBLOCK_ALLOW) {
+ /* overrides any rule to block */
+ allow = TRUE;
+ break;
+ }
+ allow = FALSE;
+ }
+ }
+ }
+ return allow;
+}
+
diff -r 6c4735564ddc src/adblock.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/adblock.h Sun Dec 21 23:56:33 2008 +0000
@@ -0,0 +1,38 @@
+#ifndef __ADBLOCK_H__
+#define __ADBLOCK_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include "url.h"
+
+typedef enum {
+ ADBLOCK_NONE = 0,
+ ADBLOCK_IMAGE = 1 << 0,
+ ADBLOCK_STYLESHEET = 1 << 1,
+ ADBLOCK_DOCUMENT = 1 << 2,
+ ADBLOCK_ALL = ADBLOCK_DOCUMENT | (ADBLOCK_DOCUMENT - 1),
+#if 0
+ /* Not needed yet */
+ ADBLOCK_SCRIPT = 1 << 3,
+ ADBLOCK_BACKGROUND = 1 << 4,
+ ADBLOCK_OBJECT = 1 << 5,
+ ADBLOCK_XBL = 1 << 6,
+ ADBLOCK_PING = 1 << 7,
+ ADBLOCK_XMLHTTPREQUEST = 1 << 8,
+ ADBLOCK_OBJECT_SUBREQUEST = 1 << 9,
+ ADBLOCK_DTD = 1 << 10,
+ ADBLOCK_SUBDOCUMENT = 1 << 11,
+ ADBLOCK_OTHER = 1 << 12,
+#endif
+} AdblockType_t;
+
+
+void a_Adblock_init();
+bool_t a_Adblock_permitted(const DilloUrl *url, AdblockType_t t);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* !__ADBLOCK_H__ */
diff -r 6c4735564ddc src/dillo.cc
--- a/src/dillo.cc Sun Dec 21 06:50:09 2008 +0000
+++ b/src/dillo.cc Sun Dec 21 23:56:33 2008 +0000
@@ -46,6 +46,7 @@
#include "dicache.h"
#include "cookies.h"
#include "auth.h"
+#include "adblock.h"
/*
* Command line options structure
@@ -269,6 +270,7 @@ int main(int argc, char **argv)
a_Bw_init();
a_Cookies_init();
a_Auth_init();
+ a_Adblock_init();
/* command line options override preferences */
if (options_got & DILLO_CLI_FULLWINDOW)
diff -r 6c4735564ddc src/html.cc
--- a/src/html.cc Sun Dec 21 06:50:09 2008 +0000
+++ b/src/html.cc Sun Dec 21 23:56:33 2008 +0000
@@ -36,6 +36,7 @@
#include "nav.h"
#include "menu.hh"
#include "prefs.h"
+#include "adblock.h"
#include "capi.h"
#include "html.hh"
#include "html_common.hh"
@@ -2136,8 +2137,9 @@ DilloImage *a_Html_add_new_image(DilloHt
style_attrs);
}
- load_now = a_UIcmd_get_images_enabled(html->bw) ||
- (a_Capi_get_flags(url) & CAPI_IsCached);
+ load_now = (a_Capi_get_flags(url) & CAPI_IsCached) ||
+ (a_UIcmd_get_images_enabled(html->bw) &&
+ a_Adblock_permitted(url, ADBLOCK_IMAGE));
Html_add_new_linkimage(html, &url, load_now ? NULL : Image);
if (load_now)
Html_load_image(html->bw, url, Image);
|