-
Notifications
You must be signed in to change notification settings - Fork 39
/
Text.fs
231 lines (186 loc) · 7.93 KB
/
Text.fs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
(*
Copyright (c) Microsoft Corporation.
Description:
String manipulation helper functions.
Author:
William Blum (WiBlum) created 9/27/2012
Revision history:
Repackaged into FSharpLu on 2/18/2015
*)
module Microsoft.FSharpLu.Text
/// File path comparer
type CaseInsensitiveComparer() =
interface System.Collections.Generic.IComparer<string> with
member __.Compare(f1, f2) =
System.String.Compare(f1, f2, System.StringComparison.OrdinalIgnoreCase)
/// Extension methods for String
type System.String with
/// Extend the string replace function to allow for StringComparison options to be specified
member this.Replace (oldString:string, newString:string, comparisonType:System.StringComparison): string =
let index = this.IndexOf(oldString, comparisonType)
if index >= 0 then
this.Remove(index, oldString.Length).Replace(oldString, newString, comparisonType).Insert(index, newString)
else
this
/// Returns true if text starts with the specified prefix
let startsWith (prefix:string) (text:System.String) =
text.StartsWith(prefix)
/// Returns true if text ends with the specified suffix
let endWith (prefix:string) (text:System.String) =
text.EndsWith(prefix)
/// Remove count characters from the end of the specified string
let chop count (text:System.String) =
text.Remove(text.Length-count)
/// Remove leading and trailing occurrences of a set of characters
let trim (chars:char[]) (text:System.String) =
text.Trim chars
/// Remove trailing occurrences of a set of characters
let trimEnd (chars:char[]) (text:System.String) =
text.TrimEnd chars
/// Remove leading occurrences of a set of characters
let trimStart (chars:char[]) (text:System.String) =
text.TrimStart chars
/// Skip count number of characters from the specified string
let skip (text:string) count =
text.Substring(count, text.Length-count)
/// Remove a prefix from the specified string
let skipPrefix (prefix:string) (text:string) =
if text.StartsWith prefix then
skip text prefix.Length
else
text
/// Remove a suffix from the specified string
let removeSuffix (suffix:string) (text:string) =
if text.EndsWith suffix then
chop suffix.Length text
else
text
/// Remove a prefix from the specified string case insensitively
let skipPrefixCaseInsensitive prefix (text:string) =
if text.StartsWith(prefix, System.StringComparison.OrdinalIgnoreCase) then
skip text prefix.Length
else
text
/// Remove a suffix case insensitively (used on file paths)
let removeSuffixCaseInsensitive suffix (text:string) =
if text.EndsWith(suffix, System.StringComparison.OrdinalIgnoreCase) then
chop suffix.Length text
else
text
/// Remove part following the the first occurrence of a given string
let removeAfter (marker:string) (text:string) =
let markPosition = text.IndexOf(marker, System.StringComparison.OrdinalIgnoreCase)
if markPosition >= 0 then
text.Remove markPosition
else
text
/// Return the right n-most characters from the string
/// where n is smaller than the length of the string.
let right n (text:string) =
text.Substring(text.Length-n,n)
/// Split a string based on the specified array of character separators
let split charSeparators (text:string) =
text.Split charSeparators
/// Split a string on a string separator
let splitOnString (stringSeparators:string[]) (text:string) =
text.Split(stringSeparators, System.StringSplitOptions.RemoveEmptyEntries)
/// Split a string based on the specified array of character separators
let splitNoEmptyEntries (charSeparators:char[]) (text:string) =
text.Split(charSeparators, System.StringSplitOptions.RemoveEmptyEntries)
/// Strip trailing and prefix character
let stripQuotes =
skipPrefix "\""
>> removeSuffix "\""
/// Split a string at the first occurrence of a character
let splitOnce (charSep:char) (text:string) =
let pos = text.IndexOf(charSep)
if pos < 0 then
invalidArg "text" "Separator not present in the string"
else
text.Substring(0, pos), text.Substring(pos+1)
/// Join a sequence of strings
let join separator (values:seq<string>) =
System.String.Join(separator,values)
/// Truncate a string to a maximum number of characters
let truncate max (text:string) =
let length = text.Length
if length <= max then
text
else
text.Substring(0,max)
/// longest common prefix of two strings
let longestCommonPrefixLength (s1:string) (s2:string) =
let chop = Seq.map2 (<>) s1 s2
match Seq.tryFindIndex id chop with
| None -> min (s1.Length) (s2.Length)
| Some i -> i
/// Indent lines in a text
let indent count =
let prefix = System.String(' ', count)
splitOnString [|System.Environment.NewLine|]
>> Seq.map (fun line -> prefix + line)
>> join System.Environment.NewLine
/// Encode a string to Base64
let encodeToBase64 (toEncode:string) =
toEncode |> System.Text.ASCIIEncoding.UTF8.GetBytes |> System.Convert.ToBase64String
/// Decode a Base64 encoded string
let decodeFromBase64 (base64Encoded:byte[]) =
let decodedString = System.Text.Encoding.UTF8.GetString(base64Encoded)
System.Convert.FromBase64String(decodedString)
/////// Implementation of Knuth–Morris–Pratt on Stream
/// Used by kmpTryFindBytesInStream below to compute the backtrack array
let computeKmpBacktrack (searchBytes: uint8[]) =
let backtrack = Array.zeroCreate (searchBytes.Length+1)
let rec back b j =
if j >= 0 && b <> searchBytes.[j] then
back b backtrack.[j]
else
j
let rec compute i j =
if j < searchBytes.Length then
if searchBytes.[j] = searchBytes.[i] then
backtrack.[j] <- backtrack.[i]
compute (i+1) (j+1)
else
backtrack.[j] <- i
let k = back searchBytes.[j] backtrack.[i]
compute (k+1) (j+1)
else
backtrack.[j] <- i
backtrack.[0] <- -1
compute 0 1
backtrack
/// Options for fmdFindBytesInStream
type FindOptions =
/// Return after finding the first occurence of bytes in the stream
| FindFirst
/// Return after finding all occurences of bytes in the stream
| FindAll
/// Use the Knuth–Morris–Pratt algorithm to search for first or all occurrences of a byte sequence in a stream
/// returns a list of positions of the occurence of bytes in the stream, or None if the bytes could not be found
let kmpFindBytesInStream (findOptions:FindOptions) (stream:System.IO.Stream) (searchBytes:uint8[]) =
let backtrack = computeKmpBacktrack searchBytes
let mutable k = 0
let mutable byteRead = stream.ReadByte()
let mutable results = []
while byteRead <> -1 && (List.isEmpty results || findOptions = FindAll) do
if searchBytes.[k] = (byteRead |> uint8) then
k <- k + 1
if k = searchBytes.Length then
results <- (stream.Position - (k|>int64)) :: results
k <- backtrack.[k]
byteRead <- stream.ReadByte()
else
k <- backtrack.[k]
if k < 0 then
k <- k + 1
byteRead <- stream.ReadByte()
List.rev results
/// Use the Knuth–Morris–Pratt algorithm to search for the occurrence of a byte sequence in a stream
/// returns the position of the first occurrence of the bytes in the stream, or None if the bytes could not be found
let kmpTryFindFirstBytesInStream (stream:System.IO.Stream) (searchBytes:uint8[]) =
List.tryHead (kmpFindBytesInStream FindFirst stream searchBytes)
/// Search for the occurrence of a byte sequence in a file without loading the entire file into memory to do it
let fileContainsBytes (filePath:string) (searchBytes:uint8[]) =
use fileStream = System.IO.File.Open (filePath, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.Read)
(kmpTryFindFirstBytesInStream fileStream searchBytes).IsSome