Friday, December 2, 2011

Automatically Load Text/Excel files in given folder

Today Morning I went to Office as usual, But unusually I got a call from my friend and asked me for a help on SQL. He has 2000 excel files in one folder and he want to load all these files into Database and he doesn't have schema for the tables for upfront. So he asked me to create schema for each table automatically/accordingly while loading data into each table.
This below Stored proc will solve that problem.
CREATE PROCEDURE [dbo].[IMPORTING_FILES_FROM_FOLDER]
(
@FOLDER_NAME VARCHAR(1024),----FOLDER PATH WHERE your files resides
@NEW_FOLDER VARCHAR(1024)=NULL,---Folder path, this is optional.Once files loaded it will move to this folder
@FILE_PATTERN VARCHAR(50)='',----what type of files you want to load
@CREATE_TABLE_AND_LOAD BIT=0,----create table and load the data
@TRUNCATE BIT=0,-----Before load truncate the table
@MOVE_TO_NEWFOLDER BIT=0----Move to new folder or not
)
/*
USAGE:::
EXEC IMPORTING_FILES_FROM_FOLDER
@FOLDER_NAME=D:\Folder0,
@NEW_FOLDER='D:\Folder0\folder01',
@FILE_PATTERN='*.xls'
@CREATE_TABLE_AND_LOAD=1,
@MOVE_TO_NEWFOLDER=1
*/
AS
BEGIN
BEGIN TRY
SET NOCOUNT ON
DECLARE @DATE VARCHAR(20),@NLN VARCHAR(4), @NRROWS INT
SELECT @DATE = CAST(GETDATE() AS VARCHAR(20)),@NLN=''+CHAR(10)+' '
DECLARE @ERRMSG NVARCHAR(4000), @ERRSEV INT, @ERRST INT, @ERRPROC VARCHAR(126),@ERRLN INT, @ERRNR INT, @ERRLOC NVARCHAR(300)
DECLARE @COMMAND VARCHAR(200),@FILE_NAME VARCHAR(100),@FULL_PATH VARCHAR(1054),@SQL VARCHAR(4000),@TABLE_NAME VARCHAR(256)

IF @FOLDER_NAME IS NULL or @FILE_PATTERN IS NULL
RAISERROR('Input parameters foldername or filepattern cannot be null',16,-1) with nowait

IF RIGHT(@FOLDER_NAME, 1)<>'\'
SET @FOLDER_NAME =@FOLDER_NAME +'\'

IF OBJECT_ID(N'TEMPDB..#FILELIST') IS NOT NULL
DROP TABLE #FILELIST
CREATE TABLE #FILELIST([FILENAME] NVARCHAR(500))

SELECT @COMMAND= 'DIR "'+ @FOLDER_NAME + @FILE_PATTERN +'" /B /A:-D /B /O:N'

INSERT INTO #FILELIST EXEC XP_CMDSHELL @COMMAND

DELETE FROM #FILELIST WHERE [FILENAME] IS NULL OR [FILENAME] = 'FILE NOT FOUND'

IF (CURSOR_STATUS('GLOBAL', 'C_LOAD') >= 0 OR CURSOR_STATUS('LOCAL', 'C_LOAD') >= 0)
BEGIN CLOSE C_LOAD; DEALLOCATE C_LOAD END

DECLARE C_LOAD CURSOR FOR SELECT [FILENAME] FROM #FILELIST
OPEN C_LOAD
FETCH NEXT FROM C_LOAD INTO @FILE_NAME
WHILE @@FETCH_STATUS=0
BEGIN
RAISERROR('PROCESSING FILE %S',0,1,@FILE_NAME) WITH NOWAIT
SET @FULL_PATH=@FOLDER_NAME+@FILE_NAME
SELECT @TABLE_NAME=SUBSTRING(@FILE_NAME,1,CHARINDEX('.',@FILE_NAME)-1)
      IF @CREATE_TABLE_AND_LOAD=1
BEGIN
IF CHARINDEX('.xls',@FILE_NAME)>0
BEGIN
SELECT @SQL ='
SELECT * INTO '+@TABLE_NAME+' FROM OPENDATASOURCE
(''Microsoft.jet.OLEDB.4.0'',''data source='+@FULL_PATH+';Extended properties=EXCEl 5.0'')...[Sheet1$]'
END

IF CHARINDEX('.csv',@FILE_NAME)>0 or      
        CHARINDEX('.txt',@FILE_NAME)>0

            BEGIN

SELECT @SQL ='
SELECT * INTO '+@TABLE_NAME+' FROM OPENDATASOURCE
(''Microsoft.jet.OLEDB.4.0'',''data source='+@FOLDER_NAME+';Extended properties=Text;'')...['+Replace(@FILE_NAME,'.','#')+']'

END
PRINT @SQL
EXEC(@SQL)
END ELSE BEGIN
IF @TRUNCATE=1
            BEGIN
SELECT @SQL='TRUNCATE TABLE '+@TABLE_NAME
PRINT @SQL
EXEC(@SQL)
END

SELECT @SQL = '
BULK INSERT TGI_DBSNP.DBO.'+@TABLE_NAME+' FROM '''+@FULL_PATH+'''
WITH (CODEPAGE=''OEM'',
BATCHSIZE=100000,
FIELDTERMINATOR =''\T'',
FIRSTROW=2,
MAXERRORS=0,
ROWTERMINATOR='''+CHAR(10)+''')
'
PRINT @SQL
EXEC(@SQL)
END

IF @MOVE_TO_NEWFOLDER=1
BEGIN
IF @NEW_FOLDER IS NULL
BEGIN
RAISERROR('NewFolder Parameter cannot null for moving to new folder',16,-1) WITH NOWAIT
END
IF RIGHT(@NEW_FOLDER, 1)<>'\'
SET @NEW_FOLDER =@NEW_FOLDER +'\'
SELECT @COMMAND= 'MOVE '+ @FOLDER_NAME+@FILE_NAME +' '+ @NEW_FOLDER+@FILE_NAME+''
PRINT 'EXEC XP_CMDSHELL ''' +@COMMAND+'''
EXEC XP_CMDSHELL @COMMAND
END
FETCH NEXT FROM C_LOAD INTO @FILE_NAME
END
CLOSE C_LOAD
DEALLOCATE C_LOAD

END TRY

BEGIN CATCH
SELECT @ERRMSG =ERROR_MESSAGE(), @ERRSEV =ERROR_SEVERITY(), @ERRST =ERROR_STATE(), @ERRLN = ERROR_LINE(), @ERRNR = ERROR_NUMBER(),@FILE_NAME=ISNULL(@FILE_NAME,'')
RAISERROR('FAILED TO LOAD FILE %s :::ERROR NR %d, STATE %d, SEV %d; LINE %d %s WITH MSG: %s',16,-1, @FILE_NAME, @ERRNR,@ERRST,@ERRSEV,@ERRLN,@NLN,@ERRMSG)
END CATCH
END


Thursday, December 1, 2011

Querying/Reading on Text/Excel files using Opendatasource


Microsoft Sql Server has provided numerous ways to importing textfile into database, like Bulkinsert, Import/export wizard,
DTS (Sql 2000), SSIS (sql 2005/2008). With above all methods if you want Query on the data, you need to first load the into database.
But using open datasource method, you can query on the file without loading into database.
This way we have full control on  the file using t-sql.

Ex:1)
We have excel file with 2 column id and name
Here is the original data in excel file::

---If we want to see the data in Querywindow using t-sql( Make sure don't open excel file while executing)
select * from OPENDATASOURCE('Microsoft.jet.OLEDB.4.0','datasource=C:\test.xls;Extended properties=EXCEl 5.0')...[one$]






---You can even query on the file using where caluse
select * from OPENDATASOURCE('Microsoft.jet.OLEDB.4.0','data source=C:\test.xls;Extended properties=EXCEl 5.0')...[one$]
where id=3





select * from OPENDATASOURCE('Microsoft.jet.OLEDB.4.0','data source=C:\test.xls;Extended properties=EXCEl 5.0')...[one$]
where name='abc'



Ex:2)
Same way we have comma separated text file with id and name columns.



Access text file using MSDASQL provider or Jet procider
SELECT * FROM OPENROWSET ('MSDASQL', 'Driver={Microsoft Text Driver (*.txt; *.csv)};DEFAULTDIR=C:\', 'SELECT * from test1.txt')
(OR)
select * from OPENDATASOURCE('Microsoft.jet.OLEDB.4.0','data source=C:\;Extended properties=Text;')...[test1#txt]






Note::
If the text file is not a comma separated, then above is not sufficient.You have to create schema.ini file in the same folder where your file resides


The one more advantage of this Opendatasource is, in upfront we don't need to create table with required columns.

Ex: This below Query create table with required columns and load the data into test1 table

select *
into test1
from OPENDATASOURCE('Microsoft.jet.OLEDB.4.0','data source=C:\test.xls;Extended properties=EXCEl 5.0')...[one$]


 

Monday, November 7, 2011

Creating Linked Server To MYSQL from Ms SQL Server and importing tables from MYSQL

One day I am seriously working on one project with Ms SQL server, Suddenly my manager jumped into my cabin and he asked me to import all the tables from MYSQL to SQL server.I never worked on MYSQL, and don't want to install any MYSQL client tools and export data into text files and load into SQL server.I thought of making easy and automate this process.
I started looking for available OLE DB provider for MYSQL, but I didn't find that.So I found an alternative way to connection to MYSQL from MS SQL server.

A provider is needed for any data source that is to be exposed through OLE DB. Microsoft releases providers for several common data sources, including SQL Server, Oracle, and Jet. In addition, Microsoft ships a special provider called the “OLE DB Provider for ODBC Drivers”. This was the very first OLE DB provider ever released, and is also known as “MSDASQL”, due to the fact that it is contained in “MSDASQL.DLL”.
MSDASQL allows OLE DB consumer applications to utilize ODBC drivers to connect to a data source. The provider converts incoming OLE DB calls into ODBC calls, and passes them on to the specified ODBC driver. It then retrieves results from the ODBC driver and formats those into OLE DB specific structures which the consumer then has access to.

To achieve this first you need to install "OLEDB provider for ODBC driver" and "MYSQL ODBC connector".
Download paths for above two components::
OLEDB provider for ODBC driver:::http://www.microsoft.com/download/en/details.aspx?displaylang=en&id=20065
MYSQL ODBC connector::http://www.microsoft.com/download/en/details.aspx?displaylang=en&id=20065
Once you install these two components, please check they are installed or not.
For OLEDB provider for ODBC driver, GO SSMS( Sql server Management studio), expand Server objects,
then expand Linked servers, then Providers, you can see "MSDASQL"


For MYSQL ODBC connector::
goto start--->RUN--->type ODBCAD32 it will open below window.
then go to drivers tab, there you can see installed driver.

Once you have provider and ODBC connector, create Linked server between MSSQL to MYSQL.



EXEC master.dbo.sp_addlinkedserver

@server = N'MYSQL',

@srvproduct=N'MySQL',

@provider=N'MSDASQL',

@provstr=N'DRIVER={MySQL ODBC 5.1 Driver}; SERVER= server name;

DATABASE=db_name; USER=XXX; PASSWORD= XXX_XXX; OPTION=3'

Once you have this successfully configured, Use below SP to import data from MYSQL server.

USE DB_Name
GO
/****** Object: StoredProcedure [dbo].[spGenerating_gene_leveldata_from_reporter_leveldata] Script Date: 01/31/2011 15:44:11 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
IF OBJECT_ID('[dbo].[sp_import_data_from_MYSQL]','P') is not null
drop procedure [dbo].[sp_import_data_from_MYSQL]
GO
CREATE PROCEDURE [dbo].[sp_import_data_from_MYSQL]
/*
purpose: This stored proc is used import data from MYSQL to MS SQL server
This stored proc produce some chucks, which we execute in different windows to increase the performance.
*/
AS
BEGIN
BEGIN try
SET NOCOUNT ON
declare @procName varchar(100), @dbName varchar(256), @date varchar(20), @ntab varchar(200), @nln varchar(4),@user varchar(56),@rows int
DECLARE @ErrMsg nvarchar(4000), @ErrSev varchar(3), @ErrSt varchar(4), @ErrProc nvarchar(126),@ErrLn nvarchar(10), @ErrNr nvarchar(10), @ErrLoc nvarchar(300)
select @procName=DB_NAME()+'.'+ss.name+'.'+so.name from sys.objects so, sys.schemas ss
where so.object_id = @@PROCID and so.schema_id=ss.schema_id
select @date = cast(getDate() as varchar(20)),@nln=''+char(10)+' ', @dbName=DB_NAME(),@ntab=@procName+'> ', @user=suser_sname(),@rows=0
DECLARE @tableName varchar(256),@databaseName varchar(256),@table_id int,@SQL Nvarchar(1024),@ParmDefinition nvarchar(500),@SQl_TXT varchar(MAX)
SELECT @databaseName='exome',@table_id=1
raiserror('Entering stored procedure "%s" at "%s"',0,1,@procName,@date) with nowait
IF (Cursor_Status('global', 'c_tables') >= 0 or Cursor_Status('local', 'c_tables') >= 0)
begin close c_tables; deallocate c_tables end
declare c_tables cursor for select Tables_in_exome FROM openquery(MYSQL, 'SHOW TABLES IN exome')
open c_tables
fetch next from c_tables into @tableName
while @@FETCH_STATUS=0
BEGIN
      DECLARE @cnt int=0
raiserror('%d.) Loading data from table%s',0,1,@table_id,@tableName) with nowait
SELECT @SQL='
SELECT @cntOUT=cnt FROM openquery(MYSQL, ''SELECT COUNT(*) as cnt FROM '+@databaseName+'.'+@tableName+''')'
SET @ParmDefinition = N'@cntOUT int OUTPUT'
EXECUTE sp_executesql @SQL, @ParmDefinition, @cntOUT=@cnt OUTPUT;
raiserror('Number of rows in %s are %d ',0,1,@tableName,@cnt) with nowait
if @cnt>10000
BEGIN
raiserror('loading in batches (each batch it will load 10000 records)',0,1)
DECLARE @start int, @end int
SELECT @start=0,@end=10000
WHILE((@cnt+10000)>@start)
BEGIN
if @start=0
BEGIN
SELECT @SQl_TXT='SELECT * INTO '+@dbName+'.'+@databaseName+'.'+@tableName+' FROM openquery(MYSQL, ''SELECT * FROM '+@databaseName+'.'+@tableName+' LIMIT '+convert(varchar(20),@start)+' ,'+convert(varchar(20),@end)+''')
GO'

print @SQl_TXT
exec(@SQl_TXT)

END ELSE BEGIN

SELECT @SQl_TXT='INSERT INTO '+@dbName+'.'+@databaseName+'.'+@tableName+' SELECT * FROM openquery(MYSQL, ''SELECT * FROM '+@databaseName+'.'+@tableName+' LIMIT '+convert(varchar(20),@start)+' ,'+convert(varchar(20),@end)+''')
GO'
print @SQl_TXT
exec(@SQl_TXT)

END

select @start=@start+10000
--select @end=@end+10000

END

END ELSE BEGIN
SELECT @SQl_TXT='SELECT * INTO '+@dbName+'.'+@databaseName+'.'+@tableName+' FROM openquery(MYSQL, ''SELECT * FROM '+@databaseName+'.'+@tableName+''')
GO'
print @SQl_TXT
exec(@SQl_TXT)

END

raiserror('%s%s',0,1,@nln,@nln) with nowait
SET @table_id=@table_id+1    

fetch next from c_tables into @tableName

END
CLOSE c_tables
DEALLOCATE c_tables

END TRY
BEGIN CATCH
select @ErrMsg =ERROR_MESSAGE(), @ErrSev =ERROR_SEVERITY(), @ErrSt =ERROR_STATE(),@ErrProc =ERROR_PROCEDURE(), @ErrLn = ERROR_LINE(), @ErrNr = ERROR_NUMBER()
raiserror('Failed to execute procname %s, %s Error nr %s, state %s, sev %s; line %s, %s with msg: %s',16,-1,@procName, @nln,@ErrNr,@ErrSt,@ErrSev,@ErrLn,@nln,@ErrMsg)
END CATCH
END
GO