When receiving files sometimes magically the file get special non-unicode values that causes issues, specially when the wrong code page is giving to the file, after most debate and some denials by the ops dba ( the hate the used of CLR) I create a script that do some quick scrubbing and generate a new file in Ascii format.
#region Help: Introduction to the script task
/* The Script Task allows you to perform virtually any operation that can be accomplished in
* a .Net application within the context of an Integration Services control flow.
*
* Expand the other regions which have "Help" prefixes for examples of specific ways to use
* Integration Services features within this script task. */
#endregion
#region Namespaces
using System;
using System.Data;
using Microsoft.SqlServer.Dts.Runtime;
using System.Windows.Forms;
using System.IO;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Text;
#endregion
namespace ST_7707900c427845f180397a51c37ec59f
{
/// <summary>
/// ScriptMain is the entry point class of the script. Do not change the name, attributes,
/// or parent of this class.
/// </summary>
[Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
public partial class ScriptMain : Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
{
#region Help: Using Integration Services variables and parameters in a script
/* To use a variable in this script, first ensure that the variable has been added to
* either the list contained in the ReadOnlyVariables property or the list contained in
* the ReadWriteVariables property of this script task, according to whether or not your
* code needs to write to the variable. To add the variable, save this script, close this instance of
* Visual Studio, and update the ReadOnlyVariables and
* ReadWriteVariables properties in the Script Transformation Editor window.
* To use a parameter in this script, follow the same steps. Parameters are always read-only.
*
* Example of reading from a variable:
* DateTime startTime = (DateTime) Dts.Variables["System::StartTime"].Value;
*
* Example of writing to a variable:
* Dts.Variables["User::myStringVariable"].Value = "new value";
*
* Example of reading from a package parameter:
* int batchId = (int) Dts.Variables["$Package::batchId"].Value;
*
* Example of reading from a project parameter:
* int batchId = (int) Dts.Variables["$Project::batchId"].Value;
*
* Example of reading from a sensitive project parameter:
* int batchId = (int) Dts.Variables["$Project::batchId"].GetSensitiveValue();
* */
#endregion
#region Help: Firing Integration Services events from a script
/* This script task can fire events for logging purposes.
*
* Example of firing an error event:
* Dts.Events.FireError(18, "Process Values", "Bad value", "", 0);
*
* Example of firing an information event:
* Dts.Events.FireInformation(3, "Process Values", "Processing has started", "", 0, ref fireAgain)
*
* Example of firing a warning event:
* Dts.Events.FireWarning(14, "Process Values", "No values received for input", "", 0);
* */
#endregion
#region Help: Using Integration Services connection managers in a script
/* Some types of connection managers can be used in this script task. See the topic
* "Working with Connection Managers Programatically" for details.
*
* Example of using an ADO.Net connection manager:
* object rawConnection = Dts.Connections["Sales DB"].AcquireConnection(Dts.Transaction);
* SqlConnection myADONETConnection = (SqlConnection)rawConnection;
* //Use the connection in some code here, then release the connection
* Dts.Connections["Sales DB"].ReleaseConnection(rawConnection);
*
* Example of using a File connection manager
* object rawConnection = Dts.Connections["Prices.zip"].AcquireConnection(Dts.Transaction);
* string filePath = (string)rawConnection;
* //Use the connection in some code here, then release the connection
* Dts.Connections["Prices.zip"].ReleaseConnection(rawConnection);
* */
#endregion
/// <summary>
/// This method is called when this script task executes in the control flow.
/// Before returning from this method, set the value of Dts.TaskResult to indicate success or failure.
/// To open Help, press F1.
/// </summary>
//Gblobal variables
#region Global Variables
String Archive = "";
String Import = "";
String Load = "";
String Hold = "";
String Scrubber = "";
String Expression = @"[^\u0000-\u007F]";
Encoding ascii = Encoding.ASCII;
int Rows2Process = 100000;
Variables vars = null;
#endregion
public void Main()
{
// TODO: Add your code here
initialized();
MoveFiles(Import, Scrubber);
FileScrubber(Scrubber);
MoveFiles(Hold, Load);
Dts.TaskResult = (int)ScriptResults.Success;
}
// Initialize Variables
private void initialized()
{
Dts.VariableDispenser.LockForRead("User::strArchive");
Dts.VariableDispenser.LockForRead("User::strImport");
Dts.VariableDispenser.LockForRead("User::strLoad");
Dts.VariableDispenser.LockForRead("User::strHold");
Dts.VariableDispenser.LockForRead("User::strScrubber");
Dts.VariableDispenser.GetVariables(ref vars);
Archive = vars["User::strArchive"].Value.ToString();
Import = vars["User::strImport"].Value.ToString();
Load = vars["User::strLoad"].Value.ToString();
Hold = vars["User::strHold"].Value.ToString();
Scrubber = vars["User::strScrubber"].Value.ToString();
}
// Start Scrubbing Mode
private void FileScrubber(String DirectoryPath)
{
List<String> _FileList = GetDirectoryFiles(DirectoryPath);
// Create the new Empty File on the Hold Directory
List<String> _Rows2Clean = new List<string>();
foreach (String FullFilePath in _FileList)
{
int RecordCnt = 0;
String FileName = GetFileName(FullFilePath);
String NewPath = Hold + "\\" + FileName;
// CreateEmptyFile(NewPath);
using (FileStream sf = File.Open(FullFilePath,FileMode.Open,FileAccess.Read,FileShare.ReadWrite))
{
StreamReader sr = new StreamReader(sf);
String row = string.Empty;
while ((row = sr.ReadLine())!= null)
{
_Rows2Clean.Add(row);
RecordCnt++;
if (_Rows2Clean.Count >= Rows2Process)
{
WriteNewFile(ref _Rows2Clean, NewPath);
_Rows2Clean.Clear();
}
}
sf.Close();
sr.Dispose();
}
if (_Rows2Clean.Count >= 1)
{
WriteNewFile(ref _Rows2Clean, NewPath);
_Rows2Clean.Clear();
}
FileName = null;
NewPath = null;
//MessageBox.Show(@"Records Count:" + RecordCnt.ToString() + " FileName " + FileName);
File.Delete(FullFilePath);
}
_FileList.Clear();
_FileList = null;
_Rows2Clean.Clear();
_Rows2Clean = null;
}
private String GetFileName(String FullFilePath)
{
FileInfo _fileInfo = new FileInfo(FullFilePath);
String FileName = String.Empty;
if (_fileInfo.Exists)
{
FileName = _fileInfo.Name.ToString().Trim();
}
_fileInfo = null;
return FileName;
}
private String RemoveUnicode(String inputString)
{
string asAscii = Encoding.ASCII.GetString(
Encoding.Convert(
Encoding.UTF8,
Encoding.GetEncoding(
Encoding.ASCII.EncodingName,
new EncoderReplacementFallback(string.Empty),
new DecoderExceptionFallback()
),
Encoding.UTF8.GetBytes(inputString)
)
);
return asAscii;
}
private void WriteNewFile(ref List<String> RowsToWrite,String FullFilePath)
{
//s = Regex.Replace(s, @"[^A-Za-z0-9]", "");
List<String> NewRows2Write = new List <String>();
foreach (String Row in RowsToWrite)
{
String regRow = Regex.Replace(Row, Expression, "");
NewRows2Write.Add(RemoveUnicode(regRow));
regRow = String.Empty;
}
RowsToWrite.Clear();
using (FileStream sf = File.Open(FullFilePath, FileMode.Append, FileAccess.Write, FileShare.None))
{
using (StreamWriter sw = new StreamWriter(sf))
{
foreach (String newRow in NewRows2Write)
{
sw.WriteLine(newRow);
}
sw.Dispose();
NewRows2Write.Clear();
NewRows2Write = null;
}
sf.Close();
}
}
private void CreateEmptyFile(String FullFilePath)
{
FileInfo _file = new FileInfo(FullFilePath);
if (!_file.Exists)
{
_file.Create();
_file = null;
}
else
{
_file = null;
}
}
// Create a list of files in the Import Folder for scrubbing
private List<String> GetDirectoryFiles(String FullFilePath)
{
String[] fdir = Directory.GetFiles(FullFilePath);
List<String> files = new List<string>(fdir);
fdir = null;
return files;
}
// Move Files to Load Directory
private void MoveFiles (String FromPath,String ToPath)
{
List<String> FromFiles = GetDirectoryFiles(FromPath);
foreach (String FullFilePath in FromFiles)
{
Boolean IsLock = IsFileLocked(FullFilePath);
if (!IsLock)
{
FileInfo _finfo = new FileInfo(FullFilePath);
FileInfo _scF = new FileInfo(ToPath + "\\" + _finfo.Name.ToString());
if (!_scF.Exists)
{
_finfo.MoveTo(_scF.FullName);
}
else
{
File.Delete(_scF.FullName);
_finfo.MoveTo(_scF.FullName);
}
}
}
FromFiles = null;
}
//Check if Files are lock If lock file won't be move to scrubber
protected virtual bool IsFileLocked(String FullFilePath)
{
FileStream stream = null;
try
{
stream = File.Open(FullFilePath,FileMode.Open, FileAccess.ReadWrite, FileShare.None);
}
catch (IOException)
{
//the file is unavailable because it is:
//still being written to
//or being processed by another thread
//or does not exist (has already been processed)
return true;
}
finally
{
if (stream != null)
stream.Close();
}
//file is not locked
return false;
}
#region ScriptResults declaration
/// <summary>
/// This enum provides a convenient shorthand within the scope of this class for setting the
/// result of the script.
///
/// This code was generated automatically.
/// </summary>
enum ScriptResults
{
Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
};
#endregion
}
}
No comments:
Post a Comment