Scripts/Update-MongoFiles.ps1
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
<# .Synopsis Updates the file system snapshot database. .Description Server: local, database: test, collections: files, files_log Module: Mdbc <https://github.com/nightroman/Mdbc> The script scans the specified directory tree, updates file and directory documents, and then removes orphan documents which have not been updated. Changes are optionally logged in another collection. Collection "files" * _id : full item path * Attributes : file system flags * Length : file length * LastWriteTime : last write time * CreationTime : creation time * Name : item name * Extension : file extension * Updated : last update time Collection "files_log" * _id : full item path * Updated : last update time * Log : array of item snapshots * Op : 0: created, 1: changed, 2: removed .Parameter Path Specifies one or more literal directory paths to be processed. .Parameter CollectionName Specifies the collection name. Default: files (implies files_log). .Parameter Log Tells to log created, changed, and removed items to files_log. .Parameter Split Tells to perform parallel data processing using Split-Pipeline. Module: SplitPipeline <https://github.com/nightroman/SplitPipeline> .Inputs None. Use the parameters to specify input. .Outputs The result object with statistics * Path : the input path * Created : count of created * Changed : count of changed * Removed : count of removed * Elapsed : elapsed time span .Link Get-MongoFile.ps1 #> param ( [Parameter(Position=0)][string[]]$Path = '.', [string]$CollectionName = 'files', [switch]$Log, [switch]$Split ) $ErrorActionPreference = 'Stop' Set-StrictMode -Version 2 $Now = [DateTime]::Now # Resolves exact case paths. function Resolve($Path) { $directory = [IO.DirectoryInfo]$Path if ($directory.Parent) { Join-Path (Resolve $directory.Parent.FullName) $directory.Parent.GetFileSystemInfos($directory.Name)[0].Name } else { $directory.Name.ToUpper() } } $Path = foreach($_ in $Path) { Resolve ($PSCmdlet.GetUnresolvedProviderPathFromPSPath($_)) } Write-Host "Updating data for $Path ..." # Connects collections and initializes data. function Connect { Import-Module Mdbc Connect-Mdbc . test $CollectionName $CollectionLog = $Database.GetCollection(($CollectionName + '_log')) $info = 1 | Select-Object Path, Created, Changed, Removed, Elapsed $info.Created = $info.Changed = $info.Removed = 0 $Update = New-MdbcUpdate -Set @{Updated = $Now} } # Gets input items from the path. function Input { $ea = if ($PSVersionTable.PSVersion.Major -ge 3) {'Ignore'} else { 0 } Get-ChildItem -LiteralPath $Path -Force -Recurse -ErrorAction $ea } # Updates documents from input items. function Update {process{ $file = !$_.PSIsContainer # main data $data = New-MdbcData $data._id = $_.FullName $data.Attributes = [int]$_.Attributes if ($file) { $data.Length = $_.Length $data.LastWriteTime = $_.LastWriteTime } # query by main data and update Updated $r = Update-MdbcData $Update $data -Result # updated means not changed, done if ($r.DocumentsAffected) {return} # more data if (!$file) { $data.LastWriteTime = $_.LastWriteTime } $data.CreationTime = $_.CreationTime $data.Name = $_.Name if ($file) { $data.Extension = $_.Extension } $data.Updated = $Now # add or update data $r = Add-MdbcData $data -Update -Result $op = [int]$r.UpdatedExisting if ($op) { ++$info.Changed } else { ++$info.Created } if (!$Log) {return} # log created or changed $data.Remove('_id') $data.Remove('Name') $data.Remove('Extension') $data.Op = $op Update-MdbcData -Collection $CollectionLog -Add -Query $_.FullName -Update ( New-MdbcUpdate -Set @{Updated = $Now; Op = $op} -Push @{Log = $data} ) }} ### Update existing . Connect $info.Path = $Path $time = [Diagnostics.Stopwatch]::StartNew() if ($Split) { Import-Module SplitPipeline Input | Split-Pipeline -Verbose -Count 2, 4 -Load 500, 5000 -Function Connect, Update -Variable CollectionName, Log, Now ` -Begin { . Connect } -Script { $input | Update } -End { $info } | .{process{ $info.Created += $_.Created $info.Changed += $_.Changed }} } else { Input | Update } ### Remove missing $in = foreach($_ in $Path) { if (!$_.EndsWith('\')) {$_ += '\'} [regex]('^' + [regex]::Escape($_)) } $queryUnknown = New-MdbcQuery -Not (New-MdbcQuery Updated -Type 9) $queryMissing = New-MdbcQuery -And (New-MdbcQuery _id -In $in), (New-MdbcQuery Updated -LT $Now) foreach($data in Get-MdbcData (New-MdbcQuery -Or $queryUnknown, $queryMissing)) { ++$info.Removed # remove data $id = $data._id Remove-MdbcData $id # log removed if ($Log) { $data.Remove('_id') $data.Remove('Name') $data.Remove('Extension') $data.Op = 2 Update-MdbcData -Collection $CollectionLog -Add -Query $id -Update ( New-MdbcUpdate -Set @{Updated = $Now; Op = 2} -Push @{Log = $data} ) } } # output info $info.Elapsed = $time.Elapsed $info |