Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sourcery refactored master branch #19

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 55 additions & 40 deletions svn_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,36 +26,35 @@ def readsvn(data, urli, match, proxy_dict):
global author_list
global excludes
if not urli.endswith('/'):
urli = urli + "/"
urli = f"{urli}/"
for a in data.text.splitlines():
# below functionality will find all usernames from svn entries file
if a == "has-props":
author_list.append(old_line)
if a == "file":
if not pattern.search(old_line):
continue
ignore = getext(old_line) in excludes
if ignore:
print('{}{}(not extracted)'.format(urli, old_line))
else:
print('{}{}'.format(urli, old_line))
if no_extract and not ignore:
save_url_svn(urli, old_line, proxy_dict)
file_list = file_list + ";" + old_line
if a == "dir":
if old_line != "":
folder_path = os.path.join("output", urli.replace("http://", "").replace("https://", "").replace("/", os.path.sep), old_line)
if not os.path.exists(folder_path):
if no_extract:
os.makedirs(folder_path)
dir_list = dir_list + ";" + old_line
print('{}{}'.format(urli, old_line))
if not os.path.exists(folder_path) and no_extract:
os.makedirs(folder_path)
dir_list = f"{dir_list};{old_line}"
print(f'{urli}{old_line}')
try:
d = requests.get(urli + old_line + "/.svn/entries", verify=False, proxies=(proxy_dict))
readsvn(d, urli + old_line, match, proxy_dict)
except Exception:
print("Error Reading {}{}/.svn/entries so killing".format(urli, old_line))
print(f"Error Reading {urli}{old_line}/.svn/entries so killing")

elif a == "file":
if not pattern.search(old_line):
continue
ignore = getext(old_line) in excludes
if ignore:
print(f'{urli}{old_line}(not extracted)')
else:
print(f'{urli}{old_line}')
if no_extract and not ignore:
save_url_svn(urli, old_line, proxy_dict)
file_list = f"{file_list};{old_line}"
elif a == "has-props":
author_list.append(old_line)
Comment on lines -29 to +57
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function readsvn refactored with the following changes:

old_line = a
return file_list, dir_list, user

Expand All @@ -67,9 +66,9 @@ def readwc(data, urli, match, proxy_dict):
global excludes
if not folder.endswith(os.path.sep):
folder = folder + os.path.sep
with open(folder + "wc.db", "wb") as f:
with open(f"{folder}wc.db", "wb") as f:
f.write(data.content)
conn = sqlite3.connect(folder + "wc.db")
conn = sqlite3.connect(f"{folder}wc.db")
Comment on lines -70 to +71
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function readwc refactored with the following changes:

c = conn.cursor()
try:
c.execute('select local_relpath, ".svn/pristine/" || substr(checksum,7,2) || "/" || '
Expand All @@ -84,9 +83,9 @@ def readwc(data, urli, match, proxy_dict):
continue
ignore = getext(filename) in excludes
if ignore:
print('{}{}(not extracted)'.format(urli, filename))
print(f'{urli}{filename}(not extracted)')
else:
print("{}{}".format(urli, filename))
print(f"{urli}{filename}")
if no_extract and not ignore:
save_url_wc(urli, filename, url_path, proxy_dict)
except Exception:
Expand All @@ -101,7 +100,7 @@ def show_list(_list, statement):
print(statement)
cnt = 1
for x in set(_list):
print("{} : {}".format(cnt, x))
print(f"{cnt} : {x}")
Comment on lines -104 to +103
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function show_list refactored with the following changes:

cnt = cnt + 1


Expand All @@ -123,7 +122,7 @@ def save_url_wc(url, filename, svn_path, proxy_dict):
with open(folder + os.path.basename(filename), "wb") as f:
f.write(r.content)
except Exception:
print("Error while accessing : {}{}".format(url, svn_path))
print(f"Error while accessing : {url}{svn_path}")
Comment on lines -126 to +125
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function save_url_wc refactored with the following changes:

if show_debug:
traceback.print_exc()

Expand All @@ -136,7 +135,12 @@ def save_url_svn(url, filename, proxy_dict):
if not folder.endswith(os.path.sep):
folder = folder + os.path.sep
try:
r = requests.get(url + "/.svn/text-base/" + filename + ".svn-base", verify=False, proxies=(proxy_dict))
r = requests.get(
f"{url}/.svn/text-base/{filename}.svn-base",
verify=False,
proxies=(proxy_dict),
)

Comment on lines -139 to +143
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function save_url_svn refactored with the following changes:

if not os.path.isdir(folder+filename):
with open(folder + filename, "wb") as f:
f.write(r.content)
Expand Down Expand Up @@ -187,8 +191,8 @@ def main(argv):
print("Proxy not defined")
proxy_dict = ""
if match:
print("Only downloading matches to {}".format(match))
match = "("+match+"|entries$|wc.db$)" # need to allow entries$ and wc.db too
print(f"Only downloading matches to {match}")
match = f"({match}|entries$|wc.db$)"
Comment on lines -190 to +195
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function main refactored with the following changes:

This removes the following comments ( why? ):

# need to allow entries$ and wc.db too

else:
match = ""
if x.wcdb and x.entries:
Expand All @@ -199,7 +203,7 @@ def main(argv):
exit()
print(url)
if not url.endswith('/'):
url = url + "/"
url = f"{url}/"
print("Checking if URL is correct")
try:
r = requests.get(url, verify=False, proxies=(proxy_dict))
Expand All @@ -217,24 +221,35 @@ def main(argv):
os.makedirs(folder_path)
if not x.entries:
print("Checking for presence of wc.db")
r = requests.get(url + "/.svn/wc.db", verify=False, allow_redirects=False, proxies=(proxy_dict))
r = requests.get(
f"{url}/.svn/wc.db",
verify=False,
allow_redirects=False,
proxies=(proxy_dict),
)

if r.status_code == 200:
print("WC.db found")
rwc = readwc(r, url, match, proxy_dict)
if rwc == 0:
if x.userlist:
show_list(author_list, "List of usernames used to commit in svn are listed below")
exit()
if rwc == 0 and x.userlist:
show_list(author_list, "List of usernames used to commit in svn are listed below")
exit()
else:
if show_debug:
print("Status code returned : {}".format(r.status_code))
print(f"Status code returned : {r.status_code}")
print("Full Response")
print(r.text)
print("WC.db Lookup FAILED")
if not x.wcdb:
print("lets see if we can find .svn/entries")
# disabling redirection to make sure no redirection based 200ok is captured.
r = requests.get(url + "/.svn/entries", verify=False, allow_redirects=False, proxies=(proxy_dict))
r = requests.get(
f"{url}/.svn/entries",
verify=False,
allow_redirects=False,
proxies=(proxy_dict),
)

if r.status_code == 200:
print("SVN Entries Found if no file listed check wc.db too")
readsvn(r, url, match, proxy_dict)
Expand All @@ -244,13 +259,13 @@ def main(argv):
exit()
else:
if show_debug:
print("Status code returned : {}".format(r.status_code))
print(f"Status code returned : {r.status_code}")
print("Full response")
print(r.text)
print(".svn/entries lookup FAILED")
print("{} doesn't contains any SVN repository in it".format(url))
print(f"{url} doesn't contains any SVN repository in it")
else:
print("URL returns {}".format(r.status_code))
print(f"URL returns {r.status_code}")
exit()


Expand Down