繁体中文
设为首页
加入收藏
当前位置:.Net技术首页 >> Asp.Net开发 >> load and filter the webpage code from remote server

load and filter the webpage code from remote server

2007-09-15 08:00:00  作者:  来源:互联网  浏览次数:0  文字大小:【】【】【
简介:our company's site use many iframes from remote server.it not a good way to visitor.So i write a programme whick could cacth the remote web pages and save the HTML code to local file.but the pages...

our company's site use many iframes from remote server.it not a good way to visitor.So i write a programme whick could cacth the remote web pages and save the HTML code to local file.but the pages always use relative path to load images and refer to some path,so i programme a convert function to filter the code that get from remote before save it to disk.

list the source:

//从远端URI获取网页写入本地

//替换SRC,HREF,IMG相对地址为绝对地址

//返回执行结果:

//-1:网络问题

//-2:写文件的时候出错

public static short getFileFromRemote(string uri,string filePath)

{

string uriBase=uri.Substring(0,uri.LastIndexOf("/")+1);

string tempCode="";

//读取源端的页面

try

{

WebRequest request = WebRequest.Create(uri);

//请求服务

WebResponse response = request.GetResponse();

//返回信息

Stream resStream = response.GetResponseStream();

StreamReader sr = new StreamReader(resStream, Encoding.GetEncoding("GB2312"));

tempCode= sr.ReadToEnd();

resStream.Close();

sr.Close();

}

catch

{

return -1;

}

//替换相对路径为绝对路径

tempCode=convertPath(tempCode,uriBase);

//写文件

try{

if(File.Exists(filePath)){

File.Delete(filePath);

}

StreamWriter sw = new StreamWriter(filePath,true,Encoding.GetEncoding("GB2312"));

sw.Write(tempCode);

sw.Flush();

sw.Close();

}

catch{

return -2;

}

return 1;

}

//将HTML页面中的连接和图片的相对路径转换为绝对路径

public static string convertPath(string tempCode,string uriBase)

{

//最小化源串

tempCode=tempCode.ToLower();

//设定要转换的标签

string[] targs=new string[] {"href","src","background"};

//循环转换

for(int i=0;i

{

string newCode=""; //目标串

Regex reg=new Regex(targs[i]);//将标签转化为正则型变量

MatchCollection mc=reg.Matches(tempCode);//找出所有的匹配的标签的位置

int curPos=0;//设定指针

//循环替换找到的标签

for(int j=0;j

{

int leftPos=mc[j].Index;//目标标签的左位置

int rightPos=mc[j].Index+targs[i].Length;//目标标签的右位置

//将目标标签右边的空格算入目标标签

while(tempCode.Substring(rightPos,1)==" ")

rightPos++;

//将目标标签右边的等号算入目标标签,如果没有找到等号开始下一个循环

if(tempCode.Substring(rightPos++,1)=="=")

{

//将目标标签右边的空格算入目标标签

while(tempCode.Substring(rightPos,1)==" ")

{

rightPos++;

}

//将目标标签右边的双引号和单引号算入目标标签

if(tempCode.Substring(rightPos,1)=="\"" || tempCode.Substring(rightPos,1)=="\'")

{

rightPos++;

}

//将目标标签右边的空格算入目标标签

while(tempCode.Substring(rightPos,1)==" ")

rightPos++;

//如果已经是相对路径,不做操作

if(tempCode.Substring(rightPos,7)!="http://")

{

newCode+=tempCode.Substring(curPos,rightPos-curPos)+uriBase;

curPos=rightPos;

}

}

//如果是最后一个循环,将源串的尾巴加入新串

if(j==mc.Count-1)

newCode+=tempCode.Substring(curPos);

}

//让新串成为源串,对新串进行下一轮替换

tempCode=newCode;

}

return tempCode;

}

//end of convertPath()

责任编辑:admin
相关文章