いろんな事をツラツラ~と書いています。

Excel VBAで文字列の差分

Excel VBAで文字列の違いを比較したかったので調べてみました。

参考にしたのは「当面C#と.NETな記録」の「.NET diff class」です。
文字列の差分に関わる部分のみVBAに書き換えてみました。

実際に大量のセルに対して連続的に動かしてみると
不安定だったので、何が起こっても責任は取れません。
ご了承ください。

特に断り無く業務に利用していただいてかまいませんが、サポートは出来ませんのでご了承ください。

サンプルプログラムのダウンロードページ



クラス モジュール: DiffResult
Option Explicit

Public Modified As Boolean
Public OriginalStart As Long
Public OriginalLength As Long
Public ModifiedStart As Long
Public ModifiedLength As Long

Public Sub Construct(ByVal modified1 As Boolean, ByVal orgStart1 As Long, ByVal orgLength1 As Long, ByVal modStart1 As Long, ByVal modLength1 As Long)
    Modified = modified1
    OriginalStart = orgStart1
    OriginalLength = orgLength1
    ModifiedStart = modStart1
    ModifiedLength = modLength1
End Sub


クラス モジュール: FastDiff
Option Explicit

Private dataA() As String
Private dataB() As String
Public isSwap As Boolean
Private fp() As Snake

Public Sub SplitChar(ByVal textA As String, ByVal textB As String)
    dataA = SplitChar1(textA)
    dataB = SplitChar1(textB)
End Sub

Public Function SplitChar1(ByVal text As String) As String()
    Dim result() As String
    ReDim result(0 To Len(text) - 1) As String
    
    Dim i As Long
    For i = 1 To Len(text)
        result(i - 1) = Mid(text, i, 1)
    Next
    
    SplitChar1 = result
End Function

Public Function DetectDiff() As DiffResult()
    ReDim fp(0 To UBound(dataA) + UBound(dataB) + 4) As Snake
    Dim i As Long
    For i = LBound(fp) To UBound(fp)
        Set fp(i) = New Snake
        
        Set fp(i).cs = Nothing
    Next
    
    Dim d As Long
    d = UBound(dataB) - UBound(dataA)
    
    Dim p As Long
    p = 0
    
    Dim k As Long
    Do
        For k = -p To d - 1
            Call SearchSnake(k)
        Next
        
        For k = d + p To d Step -1
            Call SearchSnake(k)
        Next
        
        p = p + 1
    Loop While fp(UBound(dataB) + 2).posB <> (UBound(dataB) + 2)
    
    Dim endCS As CommonSubsequence
    Set endCS = New CommonSubsequence
    Call endCS.Construct(UBound(dataA) + 1, UBound(dataB) + 1, 0, fp(UBound(dataB) + 2).cs)
    
    Dim result As CommonSubsequence
    Set result = Reverse(endCS)
    
    If (isSwap) Then
        DetectDiff = PresentDiffSwap(result, True)
    Else
        DetectDiff = PresentDiff(result, True)
    End If
End Function

Private Sub SearchSnake(ByVal k As Long)
    Dim kk As Long
    kk = UBound(dataA) + 2 + k
    
    Dim previousCS As CommonSubsequence
    
    Dim posA As Long
    Dim posB As Long
    posA = 0
    posB = 0
    
    Dim lk As Long
    Dim rk As Long
    lk = kk - 1
    rk = kk + 1
    
    Dim lb As Long
    Dim rb As Long
    
    lb = fp(lk).posB
    rb = fp(rk).posB - 1
    
    If (lb > rb) Then
        posB = lb
        Set previousCS = fp(lk).cs
    Else
        posB = rb
        Set previousCS = fp(rk).cs
    End If
    posA = posB - k
    
    Dim startA As Long
    Dim startB As Long
    startA = posA
    startB = posB
    
    Do While ((posA < UBound(dataA) + 1) And (posB < UBound(dataB) + 1))
        If (dataA(posA) <> dataB(posB)) Then
            Exit Do
        End If
        
        posA = posA + 1
        posB = posB + 1
    Loop
    
    Dim cs As CommonSubsequence
    If (startA <> posA) Then
        Set cs = New CommonSubsequence
        Call cs.Construct(startA, startB, posA - startA, previousCS)
        Set fp(kk).cs = cs
    Else
        Set fp(kk).cs = previousCS
    End If
    
    fp(kk).posB = posB + 1
End Sub


クラス モジュール: Snake
Option Explicit

Public posB As Long
Public cs As CommonSubsequence


クラス モジュール: CommonSubsequence
Option Explicit

Public startA As Long
Public startB As Long
Public Length As Long
Public Nxt As CommonSubsequence

Public Sub Construct(ByVal startA1 As Long, ByVal startB1 As Long, ByVal length1 As Long, ByRef nxt1 As CommonSubsequence)
    startA = startA1
    startB = startB1
    Length = length1
    Set Nxt = nxt1
End Sub


標準モジュール: ModuleDiff
Option Explicit

Public Function DiffChar(ByVal textA As String, ByVal textB As String) As DiffResult()
    If (textA = "" Or textB = "") Then
        DiffChar = StringNullOrEmpty(textA, textB)
        
        Exit Function
    End If
    
    Dim diff As FastDiff
    Set diff = New FastDiff
    
    If (Len(textA) <= Len(textB)) Then
        Call diff.SplitChar(textA, textB)
    Else
        diff.isSwap = True
        Call diff.SplitChar(textB, textA)
    End If
    
    Dim result() As DiffResult
    result = diff.DetectDiff
    
    DiffChar = result
End Function

Public Function StringNullOrEmpty(ByVal textA As String, ByVal textB As String) As DiffResult()
    Dim lengthA As Long
    Dim lengthB As Long
    
    lengthA = Len(textA)
    lengthB = Len(textB)
    
    Dim cs As CommonSubsequence
    Set cs = New CommonSubsequence
    Call cs.Construct(lengthA, lengthB, 0, Nothing)
    
    StringNullOrEmpty = PresentDiff(cs, True)
End Function

Private Sub DiffResultListAdd(ByRef list() As DiffResult, ByRef element As DiffResult)
    If (IsNull(list) = True) Then
        ReDim list(0 To 0) As DiffResult
        
        Set list(0) = element
    Else
        ReDim Preserve list(0 To UBound(list) + 1) As DiffResult
        
        Set list(UBound(list)) = element
    End If
End Sub

Public Function Reverse(ByRef old As CommonSubsequence) As CommonSubsequence
    Dim newTop As CommonSubsequence
    Set newTop = Nothing
    
    Dim nxt1 As CommonSubsequence
    While (Not (old Is Nothing))
        Set nxt1 = old.Nxt
        Set old.Nxt = newTop
        Set newTop = old
        Set old = nxt1
    Wend
    
    Set Reverse = newTop
End Function

Public Function PresentDiff(ByRef cs As CommonSubsequence, ByVal wantCommon As Boolean) As DiffResult()
    Dim list() As DiffResult
    
    Dim originalStart1 As Long
    Dim modifiedStart1 As Long
    originalStart1 = 0
    modifiedStart1 = 0
    
    Do While (True)
        Dim d As DiffResult
        
        If (originalStart1 < cs.startA Or modifiedStart1 < cs.startB) Then
            Set d = New DiffResult
            Call d.Construct(True, originalStart1, cs.startA - originalStart1, modifiedStart1, cs.startB - modifiedStart1)
            Call DiffResultListAdd(list, d)
        End If
        
        If (cs.Length = 0) Then
            Exit Do
        End If
        
        originalStart1 = cs.startA
        modifiedStart1 = cs.startB
        
        If (wantCommon) Then
            Set d = New DiffResult
            Call d.Construct(False, originalStart1, cs.Length, modifiedStart1, cs.Length)
            Call DiffResultListAdd(list, d)
        End If
        
        originalStart1 = originalStart1 + cs.Length
        modifiedStart1 = modifiedStart1 + cs.Length
        
        Set cs = cs.Nxt
    Loop

    PresentDiff = list
End Function

Public Function PresentDiffSwap(ByRef cs As CommonSubsequence, ByVal wantCommon As Boolean) As DiffResult()
    Dim list() As DiffResult
    
    Dim originalStart1 As Long
    Dim modifiedStart1 As Long
    originalStart1 = 0
    modifiedStart1 = 0
    
    Do While (True)
        Dim d As DiffResult
        If (originalStart1 < cs.startB Or modifiedStart1 < cs.startA) Then
            Set d = New DiffResult
            Call d.Construct(True, originalStart1, cs.startB - originalStart1, modifiedStart1, cs.startA - modifiedStart1)
            Call DiffResultListAdd(list, d)
        End If
        
        If (cs.Length = 0) Then
            Exit Do
        End If
        
        originalStart1 = cs.startB
        modifiedStart1 = cs.startA
        
        If (wantCommon) Then
            Set d = New DiffResult
            Call d.Construct(False, originalStart1, cs.Length, modifiedStart1, cs.Length)
            Call DiffResultListAdd(list, d)
        End If
        
        originalStart1 = originalStart1 + cs.Length
        modifiedStart1 = modifiedStart1 + cs.Length
        
        Set cs = cs.Nxt
    Loop
    
    PresentDiffSwap = list
End Function


標準モジュール: ModuleTestRun
Option Explicit

Public Sub TestRun()
    Dim ret As Long
    Dim range1 As Range
    Dim range2 As Range
    Dim result() As DiffResult
    Dim i As Long
    
    Set range1 = Sheet1.Cells(1, 1)
    Set range2 = Sheet1.Cells(2, 1)
    
    result = DiffChar(range2.value, range1.value)
    
    For i = LBound(result) To UBound(result)
        If (result(i).Modified = True And result(i).ModifiedLength > 0) Then
            range1.Characters(result(i).ModifiedStart + 1, result(i).ModifiedLength).Font.ColorIndex = 3
        End If
        
        If (result(i).Modified = True And result(i).OriginalLength > 0) Then
            range2.Characters(result(i).OriginalStart + 1, result(i).OriginalLength).Font.ColorIndex = 3
        End If
    Next
End Sub

 

コメント

: 管理人のみ閲覧できます
このコメントは管理人のみ閲覧できます
/ 編集 / 2009年02月03日(火) 14時21分

コメントの投稿

送信時にキーを入力しておくと後でコメントの編集ができます。
管理人にのみ表示する

トラックバック

トラックバックURL:

ブログURL(言及リンク)を含まないトラックバックは受け付けない設定になっています。

プロフィール

かず

かず (プロフィール)

サイト検索

カレンダー

 2017年10月 
1 2 3 4 5 6 7
8 9 10 11 12 13 14
15 16 17 18 19 20 21
22 23 24 25 26 27 28
29 30 31 - - - -

月別アーカイブ

QRコード

QRコード

フィード

RSS 1.0 RSS 1.0

follow us in feedly


スポンサード リンク

Amazon.co.jp

楽天

FC2ブログ

ブログ


Copyright (C) 2017 はッいィ~!? All Rights Reserved.
テンプレート by LinkFly