February 2011


The export to spreadsheet link was not visible in a particular list and we had an urgent request to export data from list to an excel.

So we created a windows application to take care of it.

Added web reference http://extranet/sites/sis-forum/_vti_bin/lists.asmx to the project and named it Sisforum.

Added a textbox txtSiteUrl  in the Windows form…and added the url of the site from where we wanted to export a list to excel e.g https://test-web.domain.org/sites/isago/_vti_bin/lists.asmx , in another textbox txtListName ,gave the name of the list we wanted to export,say “UserManagement Task” and a textbox txtFilepath  for putting the filepath like “c:\test.xls”.

 

            StringBuilder strExcel = new StringBuilder();
            StringBuilder strHtml = new StringBuilder();
            System.Net.NetworkCredential credential = new System.Net.NetworkCredential(“username”, “password”);
            XmlDocument xdoc = new XmlDocument();
            XmlNode allFields = xdoc.CreateElement(“ViewFields”);
            XmlNode QueryOpt = xdoc.CreateElement(“QueryOptions”);
            XmlNode Query = xdoc.CreateElement(“Query”);
            XmlNode xmlResults;
            try
            {
               SisForum.Lists lsts = new ExporttoExcelFile.SisForum.Lists();
                lsts.Credentials = credential;
                lsts.Url = txtSiteUrl.Text+”/_vti_bin/lists.asmx”;
                xmlResults = lsts.GetListItems(txtListName.Text, null, null, null, “1000000”, null, null);
                //Get Header Row
                foreach (XmlNode node in xmlResults)
                {
                    if (node.Name == “rs:data”)
                    {
                        for (int i = 0; i < 2; i++)
                        {
                            strExcel.Append(“\n”);
                           strHtml.Append(“”);
                           if (node.ChildNodes[i].Name == “z:row”)
                            {
              for (int j = 0; j < node.ChildNodes[i].Attributes.Count; j++)
                                {
                                    if (node.ChildNodes[i].Attributes[j] != null)
                                    {
                     string name = node.ChildNodes[i].Attributes[j].Name;
                                        name = StripHTML(name);
                   strExcel.Append(name.Substring(name.IndexOf(‘#’) + 1));
                                        strExcel.Append(“\t”);
                                    }
                                }
                            }
                        }
                    }
                }

                //Get List Items
                foreach (XmlNode node in xmlResults)
                {
                    if (node.Name == “rs:data”)
                    {
                        for (int i = 0; i < node.ChildNodes.Count; i++)
                        {
                            strExcel.Append(“\n”);
                            strHtml.Append(“”);
                            if (node.ChildNodes[i].Name == “z:row”)
                            {  
                                //get list items
          for (int j = 0; j < node.ChildNodes[i].Attributes.Count; j++)
                                {
                                    if (node.ChildNodes[i].Attributes[j] != null)
                                    {
        string name = node.ChildNodes[i].Attributes[j].Value;
                                        name = StripHTML(name);
                   strExcel.Append(name.Substring(name.IndexOf(‘#’) + 1));
                                        strExcel.Append(“\t”);
                                    }
                                }
                            }
                        }
                    }
                }
               WritetoFile(strExcel,”xls”);
               MessageBox.Show(“Completted”);
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
        }

public void WritetoFile(StringBuilder str,string Format)

        {

            string filename = txtFilepath.Text;

            TextWriter tw = new StreamWriter(filename);

            tw.Write(str);

            tw.Close();

        }

Now the string that we get from the xml by calling GetListItems needs to be formatted.We need to remove HTML development formatting, replace line breaks with space because browsers insert spaces,remove all scripts etc. The below functio takes care of that:

public string StripHTML(string source)

        {

            try

            {

                string result;

                // Remove HTML Development formatting

                // Replace line breaks with space

                // because browsers inserts space

                result = source.Replace(“\r”, ” “);

                // Replace line breaks with space

                // because browsers inserts space

                result = result.Replace(“\n”, ” “);

                // Remove step-formatting

                result = result.Replace(“\t”, string.Empty);

                // Remove repeating spaces because browsers ignore them

result = System.Text.RegularExpressions.Regex.Replace(result,  @”( )+”, ” “);

// Remove the header (prepare first by clearing attributes)

result = System.Text.RegularExpressions.Regex.Replace(result, @”<( )*head([^>])*>”, “<head>”,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                result = System.Text.RegularExpressions.Regex.Replace(result,  @”(<( )*(/)( )*head( )*>)”, “</head>”,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                result = System.Text.RegularExpressions.Regex.Replace(result,  “(<head>).*(</head>)”, string.Empty,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

// remove all scripts (prepare first by clearing attributes)

result = System.Text.RegularExpressions.Regex.Replace(result,    @”<( )*script([^>])*>”, “<script>”,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                result = System.Text.RegularExpressions.Regex.Replace(result,  @”(<( )*(/)( )*script( )*>)”, “</script>”,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);         

result = System.Text.RegularExpressions.Regex.Replace(result,   @”(<script>).*(</script>)”, string.Empty,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

// remove all styles (prepare first by clearing attributes)

result = System.Text.RegularExpressions.Regex.Replace(result,   @”<( )*style([^>])*>”, “<style>”,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result,    @”(<( )*(/)( )*style( )*>)”, “</style>”,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result, “(<style>).*(</style>)”, string.Empty,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                // insert tabs in spaces of <td> tags

result = System.Text.RegularExpressions.Regex.Replace(result,  @”<( )*td([^>])*>”, “\t”,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                // insert line breaks in places of <BR> and <LI> tags

result = System.Text.RegularExpressions.Regex.Replace(result,    @”<( )*br( )*>”, “\r”,                          System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result,   @”<( )*li( )*>”, “\r”,  System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                // insert line paragraphs (double line breaks) in place

                // if <P>, <DIV> and <TR> tags

result = System.Text.RegularExpressions.Regex.Replace(result,  @”<( )*div([^>])*>”, string.Empty,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result,  @”<( )*tr([^>])*>”, string.Empty,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result,   @”<( )*p([^>])*>”, string.Empty,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                // Remove remaining tags like <a>, links, images,

                // comments etc – anything that’s enclosed inside < >

result = System.Text.RegularExpressions.Regex.Replace(result, @”<[^>]*>”, string.Empty,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                // replace special characters:

result = System.Text.RegularExpressions.Regex.Replace(result,   @” “, ” “,   System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result, @”&bull;”, ” * “,   System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result,   @”&lsaquo;”, “<“,  System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result,  @”&rsaquo;”, “>”,     System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result,  @”&trade;”, “(tm)”,   System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result, @”&frasl;”, “/”,   System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result, @”&lt;”, “<“,    System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result, @”&gt;”, “>”,  System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result,   @”&copy;”, “(c)”,    System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result,  @”&reg;”, “(r)”,   System.Text.RegularExpressions.RegexOptions.IgnoreCase);

result = System.Text.RegularExpressions.Regex.Replace(result,  @”&(.{2,6});”, string.Empty,                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                return result;

            }

            catch

            {

                return source;

            }

        }

Advertisements

 I had to convert an excel file with different languages into a xml/dataset.

Below is the sample excel sheet I had

English French Spanish Portugese
Guinea Guinéee Guinea Guiné
Côte d’Ívoire Côte d’Ívoire Costa de Marfil Costa do Marfim
South Africa Afrique du Sud Sudáfrica África do Sul

For Excel 97-2003 format Microsoft Jet OLEDB Driver 4.0 is used. A sample connection string as follows.For Excel 2007 format the new Microsoft Ace OLEDB Driver 4.0 is used.

Code to convert is shown below:

//Connection String for Excel 97-2003 Format (.XLS)

String strExcelConn = “Provider=Microsoft.Jet.OLEDB.4.0;”

+ “Data Source=C:\\Gitolekha\\test.xls;

+ “Extended Properties=’Excel 8.0;HDR=Yes’“;

//Connect to excel sheet

OleDbConnection connExcel = new OleDbConnection(strExcelConn);

OleDbDataAdapter da = new OleDbDataAdapter();

OleDbCommand cmdExcel = new OleDbCommand();

cmdExcel.Connection = connExcel;

//Access the sheet

connExcel.Open();

DataTable dtExcelSchema;

dtExcelSchema = connExcel.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);

DataSet ds = new DataSet();

string SheetName = dtExcelSchema.Rows[0][“TABLE_NAME“].ToString();//It fetches the first sheet Sheet1$

cmdExcel.CommandText = “SELECT * From [” + SheetName + “]”;

da.SelectCommand = cmdExcel;

da.Fill(ds);

connExcel.Close();

 StringWriter sw = new StringWriter();

ds.WriteXml(sw, XmlWriteMode.IgnoreSchema);

 string textToConvert = sw.ToString();

//Encoding xml as it contains non ASCII characters of different languages 

Encoding latin = Encoding.GetEncoding(28591);

Encoding iso8= Encoding.GetEncoding(“iso-8859-8“);

Byte[] latinBytes= latin.GetBytes(textToConvert);

Byte[] iso8bytes = Encoding.Convert(latin, iso8, latinBytes);

string str = Encoding.UTF8.GetString(iso8bytes);

return str;

There is one point to remember if you are running this on a 64-bit platform:

I kept on getting eror “‘Microsoft.Jet.OLEDB.4.0′ provider is not registered on the local machine“, after much searching on the net, I dicovered :

  • there is no Jet OLEDB provider to use on a 64bit platform

However, we can configure the IIS to run 32-bit application on 64-bit Windows(IIS 6.0), for details, one can reffer the below url’s

http://www.microsoft.com/technet/prodtechnol/WindowsServer2003/Library/IIS/405f5bb5-87a3-43d2-8138-54b75db73aa1.mspx?mfr=true

http://www.microsoft.com/technet/prodtechnol/WindowsServer2003/Library/IIS/13f991a5-45eb-496c-8618-2179c3753bb0.mspx?mfr=true