Skip to content

Commit 0588436

Browse files
Digests line reading
(Committing again, because actual changes were missing in previous commit.) Program now reads a line from the digests and splits into fields. Also in this commit, pseudocode has been cleaned up. This code compiles.
1 parent 8b04302 commit 0588436

File tree

2 files changed

+32
-48
lines changed

2 files changed

+32
-48
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
<configuration>
5555
<archive>
5656
<manifest>
57-
<mainClass>org.bibalex.warcrefs.WarcRefs</mainClass>
57+
<mainClass>org.bibalex.warcrefs.Warcrefs</mainClass>
5858
</manifest>
5959
</archive>
6060
<descriptorRefs>

src/main/java/org/bibalex/warcrefs/Warcrefs.java

Lines changed: 31 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -24,66 +24,50 @@
2424
import org.jwat.warc.WarcWriterUncompressed;
2525

2626
public class Warcrefs {
27-
/*
28-
* This is the digest index, which looks like this:
29-
*
30-
* fn off len uri date digest digestExt copy referstoUri referstoDate
31-
*/
32-
private LineNumberInputStream digests;
33-
34-
/*
35-
* This is the input file (duplicated) and the output
36-
* file(deduplicated).
37-
*/
38-
private FileInputStream dup;
39-
private FileOutputStream dedup;
4027

4128
private int curentRecordOff;
4229

4330
public static void main(String[] args) {
31+
try {
32+
// The BufferedReader provides a readLine() method.
33+
BufferedReader digestsReader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0])));
4434

45-
// The BufferedReader provides a readLine() method.
46-
BufferedReader digestsReader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0])));
47-
48-
// Each line will be split into fields.
49-
String[] digestLine;
35+
/*
36+
* Each line will be split into fields, where 'h' is the digest
37+
* (hash), and 'hx' is the digest extension (and other shorthands
38+
* are obvious):
39+
*
40+
* fn off len uri date h hx copy refsuri refsdate
41+
* 0 1 2 3 4 5 6 7 8 9
42+
*/
43+
String[] digestLine;
5044

51-
// While there are more files in directories.
52-
while (1) {
53-
digestLine = digestsReader.readLine().split(" ");
45+
// While there are more files in directories.
46+
while (true) {
47+
digestLine = digestsReader.readLine().split(" ");
5448

55-
System.out.println(digestLine[1]);
56-
57-
58-
}
5949

60-
/*
50+
//IF OFF AND LEN EQUAL CURRENT FN AND LEN
6151

62-
/*
63-
* The rest of this class is more like pseudocode that will not
64-
* compile.
65-
*/
52+
//IF COPY 1
6653

67-
currentRecordOff = 0;
54+
// fis.read
55+
// (b, 0, len);
56+
// fos.write
57+
// (b, 0, len);
6858

69-
while(getNextDigest()) {
70-
if(fn == fn && off == off) {
71-
if(copy == 1) {
72-
dup.read(b, 0, len);
73-
dedup.write(b, 0, len);
74-
currentRecordOff += len;
75-
} else {
76-
wr.getNextRecord();
77-
makeRevisitRecord();
78-
ww.writeRecord();
79-
}
80-
}
59+
// currentRecordOff += len;
8160

82-
*/
61+
//IF COPY 2
62+
63+
// READ NEXT RECOD USING JWAT AND WRITE REVISIT RECORD.
8364

84-
System.exit(0);
85-
}
8665

87-
private getNextDigest() {
66+
// TODO: Implement this loop and remove the break.
67+
break;
68+
}
69+
} catch (Exception e) {
70+
e.printStackTrace();
71+
}
8872
}
8973
}

0 commit comments

Comments
 (0)