@@ -14,8 +14,8 @@ Public Class ConvertLineEndings
1414 Ux2Mac
1515 End Enum
1616
17- Const CR = ChrW( 13 )
18- Const LF = ChrW( 10 )
17+ Const CR As Char = ChrW( 13 )
18+ Const LF As Char = ChrW( 10 )
1919
2020 ''' <summary>
2121 ''' Converts a DOS text file to have Unix line endings.
@@ -45,28 +45,35 @@ Public Class ConvertLineEndings
4545 ''' <param name="convertMode">This is the type of conversion we are going to perform</param>
4646 ''' <returns>Exit code.</returns>
4747 Private Shared Async Function ReplaceLineEndings(originalFile As String , newFile As String , convertMode As TextConvertMode) As Task( Of Integer )
48+ ' Attempt to detect encoding
49+ Dim fileEncoding As Encoding = GetEncoding(originalFile)
50+ If fileEncoding Is Nothing Then Return 4
51+ Debug.Print(fileEncoding.ToString())
52+
4853 Dim convertedText As New StringBuilder
4954 Dim oldFileStream As FileStream = Nothing
5055 Try
5156 oldFileStream = New FileStream(originalFile, FileMode.Open)
52- Using oldFile As New StreamReader(oldFileStream)
57+ Using oldFile As New StreamReader(oldFileStream, fileEncoding, True )
5358 Do Until oldFile.EndOfStream
54- Dim readBuffer( 2 ) As Char
59+ Dim readBuffer( 0 ) As Char
5560 Dim readChars As Integer = Await oldFile.ReadAsync(readBuffer, 0 , 1 )
5661 If readChars < 1 Then Exit Do
5762 Select Case convertMode
5863 Case TextConvertMode.Dos2Ux
5964 If readBuffer( 0 ) = CR AndAlso oldFile.Peek() = 10 Then
6065 ' Strip out CR chars if followed by LF
61- readBuffer( 0 ) = Nothing
66+ Await oldFile.ReadAsync(readBuffer, 0 , 1 )
6267 End If
6368 Case TextConvertMode.Ux2Dos
6469 If readBuffer( 0 ) = CR AndAlso oldFile.Peek() = 10 Then
70+ ReDim Preserve readBuffer( 1 )
6571 ' This is a DOS line ending, keep it.
6672 Dim tempBuffer( 1 ) As Char
6773 Await oldFile.ReadAsync(tempBuffer, 0 , 1 )
6874 readBuffer( 1 ) = tempBuffer( 0 )
69- ElseIf readBuffer( 0 ) = ChrW( 10 ) Then
75+ ElseIf readBuffer( 0 ) = LF Then
76+ ReDim readBuffer( 1 )
7077 ' Add preceeding CR
7178 readBuffer( 0 ) = CR
7279 readBuffer( 1 ) = LF
@@ -80,20 +87,45 @@ Public Class ConvertLineEndings
8087 End Using
8188 oldFileStream = Nothing
8289 Catch ex As Exception
83- Debug.Print( "Error: " & ex.Message & vbCrLf & "Number: " & ex.HResult)
90+ Debug.Print( "Error: " & ex.Message & Environment.NewLine & "Number: " & ex.HResult.ToString )
8491 Return ex.HResult
8592 Finally
8693 If oldFileStream IsNot Nothing Then oldFileStream.Dispose()
8794 End Try
8895
8996 'Write the result out to a new file
9097 Try
91- Await File.WriteAllTextAsync (newFile, convertedText.ToString())
98+ File.WriteAllText (newFile, convertedText.ToString(), New UTF8Encoding( False ))
9299 Catch ex As Exception
93- Debug.Print( "Error: " & ex.Message & vbCrLf & "Number: " & ex.HResult)
100+ Debug.Print( "Error: " & ex.Message & Environment.NewLine & "Number: " & ex.HResult.ToString )
94101 Return ex.HResult
95102 End Try
96103
97104 Return 0 ' Exit status 0 is a good thing
98105 End Function
106+
107+ ''' <summary>
108+ ''' Attempt to detect the encoding of a file.
109+ ''' </summary>
110+ ''' <param name="filename">The file to get the encoding pattern from.</param>
111+ ''' <returns>Encoding type, defaults to ASCII</returns>
112+ Public Shared Function GetEncoding( ByVal filename As String ) As Encoding
113+ Dim bom = New Byte ( 3 ) {}
114+
115+ Try
116+ Using file = New FileStream(filename, FileMode.Open, FileAccess.Read)
117+ file.Read(bom, 0 , 4 )
118+ End Using
119+ Catch ex As Exception
120+ Debug.Print( "Error: " & ex.Message & Environment.NewLine & "Number: " & ex.HResult.ToString)
121+ Return Nothing
122+ End Try
123+
124+ If bom( 0 ) = &H2B AndAlso bom( 1 ) = &H2F AndAlso bom( 2 ) = &H76 Then Return Encoding.UTF7
125+ If bom( 0 ) = &HEF AndAlso bom( 1 ) = &HBB AndAlso bom( 2 ) = &HBF Then Return Encoding.UTF8
126+ If bom( 0 ) = &HFF AndAlso bom( 1 ) = &HFE Then Return Encoding.Unicode
127+ If bom( 0 ) = &HFE AndAlso bom( 1 ) = &HFF Then Return Encoding.BigEndianUnicode
128+ If bom( 0 ) = 0 AndAlso bom( 1 ) = 0 AndAlso bom( 2 ) = &HFE AndAlso bom( 3 ) = &HFF Then Return Encoding.UTF32
129+ Return Encoding.ASCII
130+ End Function
99131End Class
0 commit comments