|
24 | 24 | import org.jwat.warc.WarcWriterUncompressed;
|
25 | 25 |
|
26 | 26 | public class Warcrefs {
|
27 |
| - /* |
28 |
| - * This is the digest index, which looks like this: |
29 |
| - * |
30 |
| - * fn off len uri date digest digestExt copy referstoUri referstoDate |
31 |
| - */ |
32 |
| - private LineNumberInputStream digests; |
33 |
| - |
34 |
| - /* |
35 |
| - * This is the input file (duplicated) and the output |
36 |
| - * file(deduplicated). |
37 |
| - */ |
38 |
| - private FileInputStream dup; |
39 |
| - private FileOutputStream dedup; |
40 | 27 |
|
41 | 28 | private int curentRecordOff;
|
42 | 29 |
|
43 | 30 | public static void main(String[] args) {
|
| 31 | + try { |
| 32 | + // The BufferedReader provides a readLine() method. |
| 33 | + BufferedReader digestsReader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]))); |
44 | 34 |
|
45 |
| - // The BufferedReader provides a readLine() method. |
46 |
| - BufferedReader digestsReader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]))); |
47 |
| - |
48 |
| - // Each line will be split into fields. |
49 |
| - String[] digestLine; |
| 35 | + /* |
| 36 | + * Each line will be split into fields, where 'h' is the digest |
| 37 | + * (hash), and 'hx' is the digest extension (and other shorthands |
| 38 | + * are obvious): |
| 39 | + * |
| 40 | + * fn off len uri date h hx copy refsuri refsdate |
| 41 | + * 0 1 2 3 4 5 6 7 8 9 |
| 42 | + */ |
| 43 | + String[] digestLine; |
50 | 44 |
|
51 |
| - // While there are more files in directories. |
52 |
| - while (1) { |
53 |
| - digestLine = digestsReader.readLine().split(" "); |
| 45 | + // While there are more files in directories. |
| 46 | + while (true) { |
| 47 | + digestLine = digestsReader.readLine().split(" "); |
54 | 48 |
|
55 |
| - System.out.println(digestLine[1]); |
56 |
| - |
57 |
| - |
58 |
| - } |
59 | 49 |
|
60 |
| - /* |
| 50 | + //IF OFF AND LEN EQUAL CURRENT FN AND LEN |
61 | 51 |
|
62 |
| - /* |
63 |
| - * The rest of this class is more like pseudocode that will not |
64 |
| - * compile. |
65 |
| - */ |
| 52 | + //IF COPY 1 |
66 | 53 |
|
67 |
| - currentRecordOff = 0; |
| 54 | + // fis.read |
| 55 | + // (b, 0, len); |
| 56 | + // fos.write |
| 57 | + // (b, 0, len); |
68 | 58 |
|
69 |
| - while(getNextDigest()) { |
70 |
| - if(fn == fn && off == off) { |
71 |
| - if(copy == 1) { |
72 |
| - dup.read(b, 0, len); |
73 |
| - dedup.write(b, 0, len); |
74 |
| - currentRecordOff += len; |
75 |
| - } else { |
76 |
| - wr.getNextRecord(); |
77 |
| - makeRevisitRecord(); |
78 |
| - ww.writeRecord(); |
79 |
| - } |
80 |
| - } |
| 59 | + // currentRecordOff += len; |
81 | 60 |
|
82 |
| - */ |
| 61 | + //IF COPY 2 |
| 62 | + |
| 63 | + // READ NEXT RECOD USING JWAT AND WRITE REVISIT RECORD. |
83 | 64 |
|
84 |
| - System.exit(0); |
85 |
| - } |
86 | 65 |
|
87 |
| - private getNextDigest() { |
| 66 | + // TODO: Implement this loop and remove the break. |
| 67 | + break; |
| 68 | + } |
| 69 | + } catch (Exception e) { |
| 70 | + e.printStackTrace(); |
| 71 | + } |
88 | 72 | }
|
89 | 73 | }
|
0 commit comments