-
Notifications
You must be signed in to change notification settings - Fork 0
/
newmain.py
executable file
·81 lines (74 loc) · 3.15 KB
/
newmain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python
# Author: Sawood Alam <ibnesayeed@gmail.com>
#
# This scripts instantiate a Profile object, (optionally populates it from a JSON profile,) updates it using different profilres, and serializes it in JSON.
import os
import sys
import pprint
import json
import time
import requests
import ConfigParser
from profile import Profile
from cdx_extract_profiler import CDXExtractProfiler
def print_help():
"""Print help text."""
print("\nTo profile a CDX archive:")
print(" Single CDX file : main.py abc.cdx")
print(" Multiple CDX files : main.py abc.cdx def.cdx ...")
print(" Multiple CDX files : main.py *.cdx abc/*.cdx ...\n")
def write_json(jsonstr="{}", filepath="profile.json"):
"""Save JSON profile on local filesystem."""
print("Writing output to " + filepath)
f = open(filepath, "w")
f.write(jsonstr)
f.close()
def post_gist(jsonstr="{}", filename="profile.json"):
"""Post JSON profile to GitHub as a Gist."""
gist = {
"description": "An archive profile created on "+time.strftime("%Y-%m-%d at %H:%M:%S")+".",
"public": True,
"files": {
filename: {
"content": jsonstr
}
}
}
req = requests.post(config.get("github", "endpoint"),
data=json.dumps(gist),
auth=(config.get("github", "user"), config.get("github", "token")))
if req.status_code == 201:
print("Writing to GitHub: " + req.json()["html_url"])
if __name__ == "__main__":
if len(sys.argv) < 2:
print_help()
sys.exit(0)
scriptdir = os.path.dirname(os.path.abspath(__file__))
config = ConfigParser.ConfigParser()
config.read(os.path.join(scriptdir, "config.ini"))
p = Profile(name=config.get("archive", "name"),
description=config.get("archive", "description"),
homepage=config.get("archive", "homepage"),
accesspoint=config.get("archive", "accesspoint"),
memento_compliance=config.get("archive", "memento_compliance"),
timegate=config.get("archive", "timegate"),
timemap=config.get("archive", "timemap"),
established=config.get("archive", "established"),
profile_updated=time.strftime("%Y-%m-%dT%H:%M:%SZ"),
mechanism="https://oduwsdl.github.io/terms/mechanism#cdx")
cp = CDXExtractProfiler(max_host_segments=config.get("profile", "max_host_segments"),
max_path_segments=config.get("profile", "max_path_segments"),
global_stats=config.getboolean("profile", "generate_global_stats"))
cp.process_cdx_extracts(sys.argv[1:])
cp.calculate_stats()
p.stats = cp.stats
if config.getboolean("profile", "generate_key_stats"):
p.count_keys()
jsonstr = p.to_json()
opf = "profile-"+time.strftime("%Y%m%d-%H%M%S")+".json"
if config.getboolean("output", "write_to_file"):
write_json(jsonstr, filepath=os.path.join(scriptdir, "json", opf))
else:
print(jsonstr)
if config.getboolean("output", "write_to_github"):
post_gist(jsonstr, filename=opf)