1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.archive.wayback.accesscontrol.robotstxt;
20
21 import java.io.Serializable;
22 import java.util.concurrent.ConcurrentSkipListSet;
23
24
25
26
27
28 public class RobotsDirectives implements Serializable {
29 private static final long serialVersionUID = 5386542759286155383L;
30
31 ConcurrentSkipListSet<String> disallows = new ConcurrentSkipListSet<String>();
32 ConcurrentSkipListSet<String> allows = new ConcurrentSkipListSet<String>();
33 float crawlDelay = -1;
34
35 public boolean allows(String path) {
36 return !(longestPrefixLength(disallows, path) > longestPrefixLength(allows, path));
37 }
38
39
40
41
42
43
44
45 protected int longestPrefixLength(ConcurrentSkipListSet<String> prefixSet,
46 String str) {
47 String possiblePrefix = prefixSet.floor(str);
48 if (possiblePrefix != null && str.startsWith(possiblePrefix)) {
49 return possiblePrefix.length();
50 } else {
51 return 0;
52 }
53 }
54
55 public void addDisallow(String path) {
56 if(path.length()==0) {
57
58
59 return;
60 }
61 disallows.add(path);
62 }
63
64 public void addAllow(String path) {
65 allows.add(path);
66 }
67
68 public void setCrawlDelay(float i) {
69 crawlDelay=i;
70 }
71
72 public float getCrawlDelay() {
73 return crawlDelay;
74 }
75 }