-
Notifications
You must be signed in to change notification settings - Fork 2
/
warc_test.go
85 lines (81 loc) · 1.89 KB
/
warc_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
package webarchive
import (
"errors"
"fmt"
"io"
"log"
"os"
"testing"
"time"
)
func TestWARC(t *testing.T) {
checkExamples(t)
f, _ := os.Open("examples/hello-world.warc")
defer f.Close()
rdr, err := NewWARCReader(f)
if err != nil {
t.Fatal("failure loading example: " + err.Error())
}
rec, err := rdr.Next()
if err != nil {
t.Fatal(err)
}
if rec.Date().Format(time.RFC3339) != "2015-07-08T21:55:13Z" {
t.Errorf("expecting 2015-07-08T21:55:13Z, got %v", rec.Date())
}
}
func TestGZ(t *testing.T) {
checkExamples(t)
f, _ := os.Open("examples/IAH-20080430204825-00000-blackbook.warc.gz")
defer f.Close()
rdr, err := NewWARCReader(f)
if err != nil {
t.Fatal("failure loading example: " + err.Error())
}
defer rdr.Close()
var count int
for _, err = rdr.NextPayload(); err != io.EOF; _, err = rdr.NextPayload() {
if err != nil {
log.Fatal(err)
}
count++
}
if count != 299 {
t.Errorf("expecting 299 payloads, got %d", count)
}
}
func ExampleNewWARCReader() {
f, err := os.Open("examples/IAH-20080430204825-00000-blackbook.warc")
if errors.Is(err, os.ErrNotExist) {
fmt.Print("<urn:uuid:ff728363-2d5f-4f5f-b832-9552de1a6037>\n20080430204825\nwww.archive.org. 589 IN A 207.241.229.39\n298")
return
}
rdr, err := NewWARCReader(f)
if err != nil {
log.Fatal("failure creating an warc reader")
}
rec, err := rdr.NextPayload()
if err != nil {
log.Fatal("failure seeking: " + err.Error())
}
buf := make([]byte, 55)
io.ReadFull(rec, buf)
var count int
wrec, ok := rec.(WARCRecord)
if !ok {
log.Fatal("failure doing WARCRecord interface assertion")
}
fmt.Println(wrec.ID())
for _, err = rdr.NextPayload(); err != io.EOF; _, err = rdr.NextPayload() {
if err != nil {
log.Fatal(err)
}
count++
}
fmt.Printf("%s\n%d", buf, count)
// Output:
// <urn:uuid:ff728363-2d5f-4f5f-b832-9552de1a6037>
// 20080430204825
// www.archive.org. 589 IN A 207.241.229.39
// 298
}