This repository has been archived by the owner on Feb 2, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Scrape_HomeCare.R
96 lines (68 loc) · 3.33 KB
/
Scrape_HomeCare.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
install.packages("RSelenium", dependencies=TRUE)
install.packages("rvest")
library('RSelenium')
library('rvest')
#### CONNECTING VIA SAUCE LABS SERVER
# saucelabs.com
user <- "chancvoong" # Your Sauce Labs Username
key <- "c6ba611d-9e99-48a8-9762-1f95f4b92a24" # Your Sauce Labs password
port <- 80
ip <- paste0(user, ':', key, "@ondemand.saucelabs.com")
rdBrowser <- "chrome" #The brower, version and platform here were chosen arbitrarily. Choose another if you want: https://saucelabs.com/platforms
version <- "33"
platform <- "Windows 10"
extraCapabilities <- list(name = "RSelenium", username = user
, accessKey = key, tags = list("RSelenium-vignette", "OS/Browsers-vignette"))
remDr <- remoteDriver$new(remoteServerAddr = ip, port = port, browserName = rdBrowser
, version = version, platform = platform
, extraCapabilities = extraCapabilities)
#Launch the browser (every time it times out, open the remDr again)
remDr$open()
remDr$navigate("https://www.compass.state.pa.us/Compass.web/ProviderSearch/Home#/BasicSearch")
#create empty data frame outside of the function
allScrapedData <- data.frame(HomeCare=character())
#Find the address box and enter your search criteria
Address <- remDr$findElement("css selector","#address-basic")
Addr <- "Philadelphia"
Address$sendKeysToElement(list(Addr, ''))
Address$sendKeysToElement(list(key = 'tab'))
#notice that you can use xpath or css to find the element -- consider quotes
#Find the Children's Ages box and check at least one box
Unit <- remDr$findElement("css selector","#address-carelevel > a")
Unit$sendKeysToElement(list(key = 'enter'))
#use xpath or css to find the element
#Select children's ages 0-5
UTO <- remDr$findElement("xpath","//*[@id='address-carelevelUTO']")
UTO$clickElement()
#Checking each of the boxes indicates "and" instead of "or"
#The data was run by checking each box, writing a csv for each,
#combining the data and then removing duplicates
TOT <- remDr$findElement("xpath","//*[@id='address-carelevelTOT']")
TOT$clickElement()
ONE <- remDr$findElement("xpath","//*[@id='address-carelevelONE']")
ONE$clickElement()
TWO <- remDr$findElement("xpath","//*[@id='address-carelevelTWO']")
TWO$clickElement()
THREE <- remDr$findElement("xpath","//*[@id='address-carelevelTHREE']")
THREE$clickElement()
FOU <- remDr$findElement("xpath","//*[@id='address-carelevelFOU']")
FOU$clickElement()
FIV <- remDr$findElement("xpath","//*[@id='address-carelevelFIV']")
FIV$clickElement()
#Click enter -- will take a few minutes to load
Address$sendKeysToElement(list(key = 'enter'))
#--------------------------------------------------------------------------------------
#This code takes the entire results list and exports it to Excel csv for further cleaning
#Find library address value (the css selector for the table)
HomeCare <- remDr$findElement("css selector","#results-list")
HomeCare$getElementText()
#Pull all addresses
allScrapedData <- c(HomeCare$value)
#Convert to dataframe
df1 <- data.frame(allScrapedData)
#Save as csv
write.csv(df1, file = "locations.csv")
#Use Excel to remove carriage returns and replace it with commas
#Then use text to columns (comma) to divide the values
#=TRIM(SUBSTITUTE(SUBSTITUTE(B2,CHAR(13),""),CHAR(10),", ")
#--------------------------------------------------------------------------------------