Scripts/Update-MongoFiles.ps1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193

<#
.Synopsis
    Updates the file system snapshot database.
 
.Description
    Server: local, database: test, collections: files, files_log
    Module: Mdbc <https://github.com/nightroman/Mdbc>
 
    The script scans the specified directory tree, updates file and directory
    documents, and then removes orphan documents which have not been updated.
    Changes are optionally logged in another collection.
 
    Collection "files"
        * _id : full item path
        * Attributes : file system flags
        * Length : file length
        * LastWriteTime : last write time
        * CreationTime : creation time
        * Name : item name
        * Extension : file extension
        * Updated : last update time
 
    Collection "files_log"
        * _id : full item path
        * Updated : last update time
        * Log : array of item snapshots
        * Op : 0: created, 1: changed, 2: removed
 
.Parameter Path
        Specifies one or more literal directory paths to be processed.
.Parameter CollectionName
        Specifies the collection name. Default: files (implies files_log).
.Parameter Log
        Tells to log created, changed, and removed items to files_log.
.Parameter Split
        Tells to perform parallel data processing using Split-Pipeline.
        Module: SplitPipeline <https://github.com/nightroman/SplitPipeline>
 
.Inputs
    None. Use the parameters to specify input.
 
.Outputs
    The result object with statistics
        * Path : the input path
        * Created : count of created
        * Changed : count of changed
        * Removed : count of removed
        * Elapsed : elapsed time span
 
.Link
    Get-MongoFile.ps1
#>


param
(
    [Parameter(Position=0)][string[]]$Path = '.',
    [string]$CollectionName = 'files',
    [switch]$Log,
    [switch]$Split
)

$ErrorActionPreference = 'Stop'
Set-StrictMode -Version 2
$Now = [DateTime]::Now

# Resolves exact case paths.
function Resolve($Path) {
    $directory = [IO.DirectoryInfo]$Path
    if ($directory.Parent) {
        Join-Path (Resolve $directory.Parent.FullName) $directory.Parent.GetFileSystemInfos($directory.Name)[0].Name
    }
    else {
        $directory.Name.ToUpper()
    }
}
$Path = foreach($_ in $Path) { Resolve ($PSCmdlet.GetUnresolvedProviderPathFromPSPath($_)) }
Write-Host "Updating data for $Path ..."

# Connects collections and initializes data.
function Connect {
    Import-Module Mdbc
    Connect-Mdbc . test $CollectionName
    $CollectionLog = $Database.GetCollection(($CollectionName + '_log'))

    $info = 1 | Select-Object Path, Created, Changed, Removed, Elapsed
    $info.Created = $info.Changed = $info.Removed = 0
    $Update = New-MdbcUpdate -Set @{Updated = $Now}
}

# Gets input items from the path.
function Input {
    $ea = if ($PSVersionTable.PSVersion.Major -ge 3) {'Ignore'} else { 0 }
    Get-ChildItem -LiteralPath $Path -Force -Recurse -ErrorAction $ea
}

# Updates documents from input items.
function Update {process{
    $file = !$_.PSIsContainer

    # main data
    $data = New-MdbcData
    $data._id = $_.FullName
    $data.Attributes = [int]$_.Attributes
    if ($file) {
        $data.Length = $_.Length
        $data.LastWriteTime = $_.LastWriteTime
    }

    # query by main data and update Updated
    $r = Update-MdbcData $Update $data -Result

    # updated means not changed, done
    if ($r.DocumentsAffected) {return}

    # more data
    if (!$file) {
        $data.LastWriteTime = $_.LastWriteTime
    }
    $data.CreationTime = $_.CreationTime
    $data.Name = $_.Name
    if ($file) {
        $data.Extension = $_.Extension
    }
    $data.Updated = $Now

    # add or update data
    $r = Add-MdbcData $data -Update -Result
    $op = [int]$r.UpdatedExisting
    if ($op) {
        ++$info.Changed
    }
    else {
        ++$info.Created
    }
    if (!$Log) {return}

    # log created or changed
    $data.Remove('_id')
    $data.Remove('Name')
    $data.Remove('Extension')
    $data.Op = $op
    Update-MdbcData -Collection $CollectionLog -Add -Query $_.FullName -Update (
        New-MdbcUpdate -Set @{Updated = $Now; Op = $op} -Push @{Log = $data}
    )
}}

### Update existing
. Connect
$info.Path = $Path
$time = [Diagnostics.Stopwatch]::StartNew()
if ($Split) {
    Import-Module SplitPipeline
    Input | Split-Pipeline -Verbose -Count 2, 4 -Load 500, 5000 -Function Connect, Update -Variable CollectionName, Log, Now `
    -Begin { . Connect } -Script { $input | Update } -End { $info } | .{process{
        $info.Created += $_.Created
        $info.Changed += $_.Changed
    }}
}
else {
    Input | Update
}

### Remove missing
$in = foreach($_ in $Path) {
    if (!$_.EndsWith('\')) {$_ += '\'}
    [regex]('^' + [regex]::Escape($_))
}
$queryUnknown = New-MdbcQuery -Not (New-MdbcQuery Updated -Type 9)
$queryMissing = New-MdbcQuery -And (New-MdbcQuery _id -In $in), (New-MdbcQuery Updated -LT $Now)
foreach($data in Get-MdbcData (New-MdbcQuery -Or $queryUnknown, $queryMissing)) {
    ++$info.Removed

    # remove data
    $id = $data._id
    Remove-MdbcData $id

    # log removed
    if ($Log) {
        $data.Remove('_id')
        $data.Remove('Name')
        $data.Remove('Extension')
        $data.Op = 2
        Update-MdbcData -Collection $CollectionLog -Add -Query $id -Update (
            New-MdbcUpdate -Set @{Updated = $Now; Op = 2} -Push @{Log = $data}
        )
    }
}

# output info
$info.Elapsed = $time.Elapsed
$info