-
Notifications
You must be signed in to change notification settings - Fork 0
/
Raw_data_cleaning.R
106 lines (98 loc) · 5.86 KB
/
Raw_data_cleaning.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Clean data by state and characterisctics group
# library(RCurl)
# library(lubridate)
# library(purrr)
# library(magrittr)
# library(httr)
# library(psych)
# library(reshape2)
# library(data.table)
# library(tidyverse)
# library(tidyr)
# library(lattice)
# library(plotly)
# library(dplyr)
# library(devtools)
# library(fpc)
# library(bindrcpp)
# library(mgcv)
# library(scales)
# library(Hmisc)
# library(fst)
# library(cdlTools)
# library(totalcensus)
# library(dataRetrieval)
Clean_Data <- function(site1, dat1){
finalsite <- as.data.frame(site1)
finalsite$Stabbr <- cdlTools::fips(finalsite$StateCode, to='Abbreviation')
finalsite$StName <- cdlTools::fips(finalsite$Stabbr, to='Name')
finalsite <- finalsite %>% dplyr::filter(LongitudeMeasure < -65 &
LongitudeMeasure > -130 &
LatitudeMeasure > 25 &
LatitudeMeasure < 52)
finaldata <- dat1 %>% left_join(finalsite, by = c("MonitoringLocationIdentifier" = "MonitoringLocationIdentifier"))
finaldata$Year <- substr(finaldata$ActivityStartDate, start = 1, stop = 4)
finaldata$Month <- as.numeric(substr(finaldata$ActivityStartDate, start = 6, stop = 7))
finaldata$ResultMeasureValue <- as.numeric(as.character(finaldata$ResultMeasureValue))
finaldata$DetectionQuantitationLimitMeasure.MeasureValue <- as.numeric(as.character(finaldata$DetectionQuantitationLimitMeasure.MeasureValue))
#Assigning the negative and NAN reported values as "Not Detented"
finalSubset1 <- finaldata
# mutate(ResultDetectionConditionText2 = ifelse((is.na(ResultMeasureValue) | ResultMeasureValue < 0) , "Not Detected", "Detected"))
# Unit conversion and cleaning
finalSubset1$ResultMeasure.MeasureUnitCode <- gsub(" ", replacement = "", finalSubset1$ResultMeasure.MeasureUnitCode,fixed = TRUE)
finalSubset1$ResultMeasure.MeasureUnitCode <- gsub(pattern="mg/L", replacement = "mg/l", finalSubset1$ResultMeasure.MeasureUnitCode,fixed = TRUE)
finalSubset1$ResultMeasure.MeasureUnitCode <- gsub(pattern="ug/L", replacement = "ug/l", finalSubset1$ResultMeasure.MeasureUnitCode,fixed = TRUE)
finalSubset1$ResultMeasure.MeasureUnitCode <- gsub(pattern="ng/L", replacement = "ng/l", finalSubset1$ResultMeasure.MeasureUnitCode,fixed = TRUE)
finalSubset1$ResultMeasure.MeasureUnitCode <- gsub(pattern="ng/L", replacement = "ng/l", finalSubset1$ResultMeasure.MeasureUnitCode,fixed = TRUE)
finalSubset1$ResultMeasure.MeasureUnitCode <- sub("^$", "No Unit", finalSubset1$ResultMeasure.MeasureUnitCode)
#Result_detection_condition_text2 <- c("Detected", "Not Detected")
cols.num <- c("LatitudeMeasure","LongitudeMeasure", "WellDepthMeasure.MeasureValue", "WellHoleDepthMeasure.MeasureValue")
finalSubset1[cols.num] <- sapply(finalSubset1[cols.num], as.numeric)
#Warning: Error in : Evaluation error: missing values and NaN's not allowed if 'na.rm' is FALSE.
#109: <Anonymous>
# finalSubset1=finalSubset1[!is.na(finalSubset1$LongitudeMeasure),]
# finalSubset1=finalSubset1[!is.na(finalSubset1$LatitudeMeasure),]
#finalSubset1[is.na(finalSubset1)] <- ""
return(finalSubset1)
}
#
# Clean_Data2<- function(site1, dat1){
#
# finalsite <- as.data.frame(site1)
# finalsite$Stabbr <- fips(finalsite$StateCode, to='Abbreviation')
# finalsite$StName <- fips(finalsite$Stabbr, to='Name')
#
#
# finalsite <- finalsite %>% dplyr::filter(LongitudeMeasure < -65 &
# LongitudeMeasure > -130 &
# LatitudeMeasure > 25 &
# LatitudeMeasure < 52)
#
# finaldata <- dat1 %>% left_join(finalsite, by = c("MonitoringLocationIdentifier" = "MonitoringLocationIdentifier"))
#
#
# finaldata$Year <- substr(finaldata$ActivityStartDate, start = 1, stop = 4)
# finaldata$Month <- as.numeric(substr(finaldata$ActivityStartDate, start = 6, stop = 7))
# finaldata$ResultMeasureValue <- as.numeric(as.character(finaldata$ResultMeasureValue))
# finaldata$DetectionQuantitationLimitMeasure.MeasureValue <- as.numeric(as.character(finaldata$DetectionQuantitationLimitMeasure.MeasureValue))
# #Assigning the negative and NAN reported values as "Not Detented"
# finalSubset1 <- finaldata %>%
# # mutate(ResultDetectionConditionText2 = ifelse((is.na(ResultMeasureValue) | ResultMeasureValue < 0) , "Not Detected", "Detected"))
# # Unit conversion and cleaning
# finalSubset1$ResultMeasure.MeasureUnitCode <- gsub(" ", replacement = "", finalSubset1$ResultMeasure.MeasureUnitCode,fixed = TRUE)
# finalSubset1$ResultMeasure.MeasureUnitCode <- gsub(pattern="mg/L", replacement = "mg/l", finalSubset1$ResultMeasure.MeasureUnitCode,fixed = TRUE)
# finalSubset1$ResultMeasure.MeasureUnitCode <- gsub(pattern="ug/L", replacement = "ug/l", finalSubset1$ResultMeasure.MeasureUnitCode,fixed = TRUE)
# finalSubset1$ResultMeasure.MeasureUnitCode <- gsub(pattern="ng/L", replacement = "ng/l", finalSubset1$ResultMeasure.MeasureUnitCode,fixed = TRUE)
# finalSubset1$ResultMeasure.MeasureUnitCode <- gsub(pattern="ng/L", replacement = "ng/l", finalSubset1$ResultMeasure.MeasureUnitCode,fixed = TRUE)
# finalSubset1$ResultMeasure.MeasureUnitCode <- sub("^$", "No Unit", finalSubset1$ResultMeasure.MeasureUnitCode)
# #Result_detection_condition_text2 <- c("Detected", "Not Detected")
# cols.num <- c("LatitudeMeasure","LongitudeMeasure", "WellDepthMeasure.MeasureValue", "WellHoleDepthMeasure.MeasureValue")
# finalSubset1[cols.num] <- sapply(finalSubset1[cols.num], as.numeric)
# #Warning: Error in : Evaluation error: missing values and NaN's not allowed if 'na.rm' is FALSE.
# #109: <Anonymous>
# # finalSubset1=finalSubset1[!is.na(finalSubset1$LongitudeMeasure),]
# # finalSubset1=finalSubset1[!is.na(finalSubset1$LatitudeMeasure),]
# #finalSubset1[is.na(finalSubset1)] <- ""
# return(finalSubset1)
# }
#