January 29, 2014

Removing special characters from a File

When receiving files sometimes magically the file get special non-unicode values that causes issues, specially when the wrong code page is giving to the file, after most debate and some denials by the ops dba ( the hate the used of CLR) I create a script that do some quick scrubbing and generate a new file in Ascii format.

#region Help:  Introduction to the script task
/* The Script Task allows you to perform virtually any operation that can be accomplished in
 * a .Net application within the context of an Integration Services control flow.
 *
 * Expand the other regions which have "Help" prefixes for examples of specific ways to use
 * Integration Services features within this script task. */
#endregion


#region Namespaces
using System;
using System.Data;
using Microsoft.SqlServer.Dts.Runtime;
using System.Windows.Forms;
using System.IO;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Text;
#endregion

namespace ST_7707900c427845f180397a51c37ec59f
{
    /// <summary>
    /// ScriptMain is the entry point class of the script.  Do not change the name, attributes,
    /// or parent of this class.
    /// </summary>
[Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
public partial class ScriptMain : Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
{
        #region Help:  Using Integration Services variables and parameters in a script
        /* To use a variable in this script, first ensure that the variable has been added to 
         * either the list contained in the ReadOnlyVariables property or the list contained in 
         * the ReadWriteVariables property of this script task, according to whether or not your
         * code needs to write to the variable.  To add the variable, save this script, close this instance of
         * Visual Studio, and update the ReadOnlyVariables and 
         * ReadWriteVariables properties in the Script Transformation Editor window.
         * To use a parameter in this script, follow the same steps. Parameters are always read-only.
         * 
         * Example of reading from a variable:
         *  DateTime startTime = (DateTime) Dts.Variables["System::StartTime"].Value;
         * 
         * Example of writing to a variable:
         *  Dts.Variables["User::myStringVariable"].Value = "new value";
         * 
         * Example of reading from a package parameter:
         *  int batchId = (int) Dts.Variables["$Package::batchId"].Value;
         *  
         * Example of reading from a project parameter:
         *  int batchId = (int) Dts.Variables["$Project::batchId"].Value;
         * 
         * Example of reading from a sensitive project parameter:
         *  int batchId = (int) Dts.Variables["$Project::batchId"].GetSensitiveValue();
         * */

        #endregion

        #region Help:  Firing Integration Services events from a script
        /* This script task can fire events for logging purposes.
         * 
         * Example of firing an error event:
         *  Dts.Events.FireError(18, "Process Values", "Bad value", "", 0);
         * 
         * Example of firing an information event:
         *  Dts.Events.FireInformation(3, "Process Values", "Processing has started", "", 0, ref fireAgain)
         * 
         * Example of firing a warning event:
         *  Dts.Events.FireWarning(14, "Process Values", "No values received for input", "", 0);
         * */
        #endregion

        #region Help:  Using Integration Services connection managers in a script
        /* Some types of connection managers can be used in this script task.  See the topic 
         * "Working with Connection Managers Programatically" for details.
         * 
         * Example of using an ADO.Net connection manager:
         *  object rawConnection = Dts.Connections["Sales DB"].AcquireConnection(Dts.Transaction);
         *  SqlConnection myADONETConnection = (SqlConnection)rawConnection;
         *  //Use the connection in some code here, then release the connection
         *  Dts.Connections["Sales DB"].ReleaseConnection(rawConnection);
         *
         * Example of using a File connection manager
         *  object rawConnection = Dts.Connections["Prices.zip"].AcquireConnection(Dts.Transaction);
         *  string filePath = (string)rawConnection;
         *  //Use the connection in some code here, then release the connection
         *  Dts.Connections["Prices.zip"].ReleaseConnection(rawConnection);
         * */
        #endregion


/// <summary>
        /// This method is called when this script task executes in the control flow.
        /// Before returning from this method, set the value of Dts.TaskResult to indicate success or failure.
        /// To open Help, press F1.
        /// </summary>

        //Gblobal variables
        #region Global Variables
        String Archive = "";
        String Import = "";
        String Load = "";
        String Hold = "";
        String Scrubber = "";
        String Expression = @"[^\u0000-\u007F]";
        Encoding ascii = Encoding.ASCII;
        int Rows2Process = 100000;
        Variables vars = null;
        #endregion
        public void Main()
{
// TODO: Add your code here
            initialized();
            MoveFiles(Import, Scrubber);
           
            FileScrubber(Scrubber);
            MoveFiles(Hold, Load);
           
Dts.TaskResult = (int)ScriptResults.Success;
}

        // Initialize Variables 
        private void initialized()
        {
           
            Dts.VariableDispenser.LockForRead("User::strArchive");
            Dts.VariableDispenser.LockForRead("User::strImport");
            Dts.VariableDispenser.LockForRead("User::strLoad");
            Dts.VariableDispenser.LockForRead("User::strHold");
            Dts.VariableDispenser.LockForRead("User::strScrubber");
            Dts.VariableDispenser.GetVariables(ref vars);
            Archive = vars["User::strArchive"].Value.ToString();
            Import = vars["User::strImport"].Value.ToString();
            Load = vars["User::strLoad"].Value.ToString();
            Hold = vars["User::strHold"].Value.ToString();
            Scrubber = vars["User::strScrubber"].Value.ToString();
        }

        // Start Scrubbing Mode 
        private void FileScrubber(String DirectoryPath)
        {
            List<String> _FileList = GetDirectoryFiles(DirectoryPath);
           // Create the new Empty File on the Hold Directory
            List<String> _Rows2Clean = new List<string>();
            foreach (String FullFilePath in _FileList)
            {
                 int RecordCnt = 0;
                String FileName = GetFileName(FullFilePath);
                String NewPath = Hold + "\\" + FileName;
               // CreateEmptyFile(NewPath);
                using (FileStream sf = File.Open(FullFilePath,FileMode.Open,FileAccess.Read,FileShare.ReadWrite))
                {
                    StreamReader sr = new StreamReader(sf);
                    String row = string.Empty;
                   
                    while ((row = sr.ReadLine())!= null)
                    {
                    
                        _Rows2Clean.Add(row);
                        RecordCnt++;
                        if (_Rows2Clean.Count >= Rows2Process)
                        {
                            WriteNewFile(ref _Rows2Clean, NewPath);
                            _Rows2Clean.Clear();
                        }
                    }
                    sf.Close();
                    sr.Dispose();
                    
                }
                if (_Rows2Clean.Count >= 1)
                {
                    WriteNewFile(ref _Rows2Clean, NewPath);
                    _Rows2Clean.Clear();
                }
                FileName = null;
                NewPath = null;
                //MessageBox.Show(@"Records Count:" + RecordCnt.ToString() + " FileName " + FileName);
                File.Delete(FullFilePath);
            }
            _FileList.Clear();
            _FileList = null;
            _Rows2Clean.Clear();
            _Rows2Clean = null;

        }
        private String GetFileName(String FullFilePath)
        {
            FileInfo _fileInfo = new FileInfo(FullFilePath);
       
                String FileName = String.Empty;
                if (_fileInfo.Exists)
                {
                    FileName = _fileInfo.Name.ToString().Trim();

                }
           
            _fileInfo = null;
            return FileName;
        }
        private String RemoveUnicode(String inputString)
        {
            string asAscii = Encoding.ASCII.GetString(
                                                        Encoding.Convert(
                                                        Encoding.UTF8,
                                                        Encoding.GetEncoding(
                                                        Encoding.ASCII.EncodingName,
                                                        new EncoderReplacementFallback(string.Empty),
                                                        new DecoderExceptionFallback()
                                                        ),
                                                        Encoding.UTF8.GetBytes(inputString)
                                                        )
                                                        );
            return asAscii;
        }
        private void WriteNewFile(ref List<String> RowsToWrite,String FullFilePath)
        {
            //s = Regex.Replace(s, @"[^A-Za-z0-9]", "");
            List<String> NewRows2Write = new List <String>();
            foreach (String Row in RowsToWrite)
            {
                String regRow = Regex.Replace(Row, Expression, "");

                NewRows2Write.Add(RemoveUnicode(regRow));
                regRow = String.Empty;

            }
            RowsToWrite.Clear();

            using (FileStream sf = File.Open(FullFilePath, FileMode.Append, FileAccess.Write, FileShare.None))
            {
                using (StreamWriter sw = new StreamWriter(sf))
                {
                    foreach (String newRow in NewRows2Write)
                    {

                        sw.WriteLine(newRow);

                    }
                    sw.Dispose();
                    NewRows2Write.Clear();
                    NewRows2Write = null;
                }
                sf.Close();


            }
         
           



        }
        private void CreateEmptyFile(String FullFilePath)
        {
            FileInfo _file = new FileInfo(FullFilePath);

            if (!_file.Exists)
            {
                _file.Create();
                _file = null;
                
            }
            else
            {
                _file = null;
            }

        }
        // Create a list of files in the Import Folder for scrubbing

        private List<String> GetDirectoryFiles(String FullFilePath)
        {
          
            String[] fdir = Directory.GetFiles(FullFilePath);
            List<String> files = new List<string>(fdir);

            fdir = null;

            return files;
        }
        
        
        // Move Files to Load Directory
        private void MoveFiles (String FromPath,String ToPath)
        {
            List<String> FromFiles = GetDirectoryFiles(FromPath);
           foreach (String FullFilePath in FromFiles)
            {

                Boolean IsLock = IsFileLocked(FullFilePath);
                if (!IsLock)
                {
                    FileInfo _finfo = new FileInfo(FullFilePath);
                    FileInfo _scF = new FileInfo(ToPath + "\\" + _finfo.Name.ToString());
                    if (!_scF.Exists)
                    {

                        _finfo.MoveTo(_scF.FullName);

                    }
                    else
                    {
                        File.Delete(_scF.FullName);
                        _finfo.MoveTo(_scF.FullName);
                    }

                }
                
                                   
                }
           FromFiles = null;

           
        }
       //Check if Files are lock If lock file won't be move to scrubber
        protected virtual bool IsFileLocked(String FullFilePath)
        {
            FileStream stream = null;

            try
            {
                stream = File.Open(FullFilePath,FileMode.Open, FileAccess.ReadWrite, FileShare.None);
            }
            catch (IOException)
            {
                //the file is unavailable because it is:
                //still being written to
                //or being processed by another thread
                //or does not exist (has already been processed)
                return true;
            }
            finally
            {
                if (stream != null)
                    stream.Close();
            }

            //file is not locked
            return false;
        }

       

        #region ScriptResults declaration
        /// <summary>
        /// This enum provides a convenient shorthand within the scope of this class for setting the
        /// result of the script.
        /// 
        /// This code was generated automatically.
        /// </summary>
        enum ScriptResults
        {
            Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
            Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
        };
        #endregion

}
}

No comments:

Post a Comment

Contact Form

Name

Email *

Message *