一个开源的Asp.net2.0博客系统
功能是在输入一个城市或地区名称后,能提取所有的该地区的企业信息
在刚刚总算完成了个大概,用了一天多点的时间,主要代码如下
- void CCompanyInformationView::OnCollectInformation()
- {
- CString strURL="http://www.hengzhe.com/Company/gs.html?q=";
- CWTString wts;
- CString str ;
- CString LinkSavePath="E:\\LinkSave.txt";
- CStdioFile myFile;
- myFile.Open(LinkSavePath,CFile::modeCreate|CFile::modeReadWrite);
- myFile.Close();
- m_bar.GetDlgItemText(idcArea,str);
- strURL+=wts.URLEncode(wts.GB2312ToUTF8(str));
- strURL+="&ar=%e5%85%a8%e5%9b%bd";
- LoopPageLink(strURL);
- }
- //-----递归取页数
- void CCompanyInformationView::LoopPageLink(CString strURL)
- {
- CString strInitURL=strURL;
- CWTString wts;
- CString str ;
- GetPageLink(strURL);
- if(ant.Request("GET",strInitURL)<1){
- CString strError = ant.GetError();
- return ;
- }
- str=wts.UTF8ToGB2312(ant.m_strBody);
- WriteFileText("E:\\Company.htm",str);
- reader.SetHtml(str);
- LPCTSTR lpTag;
- CString strValue,strName,strInside;
- /*for(;;)
- {
- lpTag=reader.GetNextTag();
- strName=reader.GetTagName(lpTag);
- if(strName!="a") continue;
- strInside=reader.GetInnerHtml(lpTag);
- strValue=reader.GetTagAttribute(lpTag,"href");
- if(strValue.Find("gs.html?")<0) continue;
- str="http://www.hengzhe.com/Company/"+strValue;
- GetPageLink(str);
- break;
- }*/
- for(;;)
- {
- lpTag=reader.GetNextTag();
- strName=reader.GetTagName(lpTag);
- if(strName!="a") continue;
- if(strName.Compare("/html")==0) break;
- strInside=reader.GetInnerHtml(lpTag);
- if(strInside.Compare("下一页")!=0) continue;
- strValue=reader.GetTagAttribute(lpTag,"href");
- strValue="http://www.hengzhe.com/Company/"+strValue;
- LoopPageLink(strValue);
- break;
- }
- }
- //----------------------------------取一个页面链接的所有公司
- void CCompanyInformationView::GetPageLink(CString strURL)
- {
- CWTString wts;
- CString str ;
- WriteFileText("E:\\kankan.txt",strURL);
- if(ant.Request("GET",strURL)<1){
- CString strError = ant.GetError();
- return ;
- }
- str=wts.UTF8ToGB2312(ant.m_strBody);
- CString LinkSavePath="E:\\LinkSave.txt";
- CStdioFile myFile;
- myFile.Open(LinkSavePath,CFile::modeReadWrite);
- reader.SetHtml(str);
- LPCTSTR lpTag;
- CString strValue,strName,strInside;
- for(;;)
- {
- lpTag=reader.GetNextTag();
- strName=reader.GetTagName(lpTag);
- if(strName.Compare("/html")==0) break;
- if(strName!="a") continue;
- strValue=reader.GetTagAttribute(lpTag,"href");
- if(strValue.Find("vi.html?")<0) continue;
- str="http://www.hengzhe.com/Company/"+strValue;
- myFile.WriteString("\r\n");
- myFile.WriteString(str);
- }
- myFile.Close();
- SaveData();
- }
- //----------------------------------
- void CCompanyInformationView::SaveData()//数据库操作将某个企业基本信息保存到数据库
- {
- CWTString wts;
- CString str;
- CSQLInsert sql;
- CString strURL;
- CStdioFile myFile;
- myFile.Open("E:\\LinkSave.txt",CFile::modeRead);
- myFile.ReadString(strURL);
- for(;;)
- {
- myFile.ReadString(strURL);
- if (strURL.IsEmpty()) break;
- if(ant.Request("GET",strURL)<1){
- CString strError = ant.GetError();
- return ;
- }
- sql.SetTableName("CompanyInformation");
- int index=dbGetLastKeyId(m_pDB,"CompanyInformation")+1;
- sql.SetFieldLong("KeyId",index);
- LPCTSTR lpTag;
- CString strValue,strName,strInside;
- int at1,at2;
- reader.SetHtml(wts.UTF8ToGB2312(ant.m_strBody));
- for(;;)
- {
- lpTag=reader.GetNextTag();
- strName=reader.GetTagName(lpTag);
- if(strName.Compare("/body")==0) break;
- if(strName.Compare("td")!=0) continue;
- strValue=reader.GetTagAttribute(lpTag,"width");
- if(strValue.Compare("70%")==0) continue;
- strInside=reader.GetInnerHtml(lpTag);
- if(strInside.Find("公司名称:")>=0)
- {
- strInside.TrimRight();
- at1=strInside.Find(":");
- str=strInside.Mid(at1+2,strInside.GetLength()-at1+3);
- str.TrimLeft();
- //str.TrimLeft(":");
- sql.SetFieldText("Customer",str);
- }
- else if(strInside.Find("所在区域:")>=0)
- {
- strInside.TrimRight();
- if(strInside.GetLength()<=5) continue;
- at1=strInside.Find(":");
- at2=strInside.Find(".");
- str=strInside.Mid(at1+2,at2-at1-2);
- sql.SetFieldText("Province",str);
- str=strInside.Mid(at2+1,strInside.GetLength()-at2+1);
- str.TrimLeft();
- sql.SetFieldText("City",str);
- }
- else if(strInside.Find("联 系 人:")>=0)
- {
- strInside.TrimRight();
- if(strInside.GetLength()<=6) continue;
- at1=strInside.Find(":");
- str=strInside.Mid(at1+2,strInside.GetLength()-at1);
- str.TrimLeft();
- sql.SetFieldText("Contact",str);
- }
- else if(strInside.Find("详细信息:")>=0)
- {
- lpTag=reader.GetNextTag();
- lpTag=reader.GetNextTag();
- strInside=reader.GetInnerHtml(lpTag);
- strInside.TrimRight();
- str.TrimLeft();
- sql.SetFieldText("Abstract",strInside);
- }
- else if(strInside.Find("联系电话:")>=0)
- {
- strInside.TrimRight();
- if(strInside.GetLength()<=10) continue;
- at1=strInside.Find("-");
- at2=strInside.Find("-",at1+1);
- str=strInside.Mid(at1+1,at2-at1-1);
- if(!str.IsEmpty()) sql.SetFieldText("Area",str);
- str=strInside.Mid(at2+1,strInside.GetLength()-at2);
- if(!str.IsEmpty()) sql.SetFieldText("Phone",str);
- }
- else if(strInside.Find("传 真:")>=0)
- {
- strInside.TrimRight();
- if(strInside.Find("-")>=0)
- {
- at1=strInside.Find("-");
- at2=strInside.Find("-",at1+1);
- str=strInside.Mid(at2+1,strInside.GetLength()-at2);
- str.TrimLeft();
- sql.SetFieldText("Fax",str);
- }
- }
- else if(strInside.Find("移动电话:")>=0)
- {
- strInside.TrimRight();
- at1=strInside.Find(":");
- str=strInside.Mid(at1+2,strInside.GetLength()-at1);
- str.TrimLeft();
- at1=str.Find(">");
- str=str.Mid(at1+1,str.GetLength()-at1);
- if(str.IsEmpty()) continue;
- sql.SetFieldText("Mobile",str);
- }
- else if(strInside.Find("电子邮箱:")>=0)
- {
- strInside.TrimRight();
- int ii=strInside.GetLength();
- if(strInside.GetLength()<=10) continue;
- at1=strInside.Find(":");
- str=strInside.Mid(at1+1,strInside.GetLength()-at1);
- str.TrimLeft();
- sql.SetFieldText("Email",str);
- }
- else if(strInside.Find("公司地址:")>=0)
- {
- strInside.TrimRight();
- at1=strInside.Find(":");
- str=strInside.Mid(at1+2,strInside.GetLength()-at1);
- str.TrimLeft();
- sql.SetFieldText("Address",str);
- }
- else if(strInside.Find("公司网址:")>=0)
- {
- strInside.TrimRight();
- if(strInside.Find("www")<0&&strInside.Find("http")<0) continue;
- lpTag=reader.GetNextTag();
- strValue=reader.GetTagAttribute(lpTag,"href");
- sql.SetFieldText("Homepage",strValue);
- }
- }
- CString strSQL=sql.GetSQL();
- if(m_pDB->Execute(strSQL)<1)
- {
- MessageBox(m_pDB->m_strError+"\r\n"+strSQL);
- return;
- }
- sql.DeleteContents();
- m_qdbgrid.Retrieve(m_pDB);
- m_grid.Invalidate();
- }
- }