#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <process.h>
#include <windows.h>
#include <time.h>
#include "network.h"
#include "convert.h"
#include "auth.h"
#include "zhconverter.h"
struct problemlist
{
char *title;
char *id;
struct problemlist *next;
};
struct neditargv
{
char *title;
HTTP newtext;
char *id;
char *time;
};
int threadc[1024];
char *threads[1024];
HANDLE threadpool[1024];
CRITICAL_SECTION cs;
CRITICAL_SECTION tcs;
CRITICAL_SECTION hcs;
int threadnumber=0;
int action=0;
struct hashlist *hl=NULL;
struct problemlist *pbl=NULL;
char *rdtemp_u;
char *username=NULL;
char *passwd=NULL;
char *ns=NULL;
char *convertion_table=NULL;
int maxthread=256;
int doallpage=0;
static int parsearg(int argc,char *argv[])
{
int hasu=0,hasp=0,hass=0,hasf=0;
int i=0;
doallpage=0;
for(i=1;i<argc;i++)
{
if(argv[i][0]=='-'&&(argv[i+1]&&argv[i+1][0]!='-'))
{
switch(argv[i][1])
{
case 'u':
username=argv[i+1];
hasu=1;
i++;
break;
case 'p':
passwd=argv[i+1];
hasp=1;
i++;
break;
case 'n':
ns=argv[i+1];
i++;
break;
case 'T':
maxthread=atoi(argv[i+1]);
if(maxthread<1||maxthread>1024) maxthread=256;
i++;
break;
case 'F':
convertion_table=argv[i+1];
hasf=1;
i++;
break;
}
}
}
if(!ns) ns="0";
if(hasu&&hasp&&hasf) return 0;
else return -1;
}
static int smartedit(struct neditargv *p,char *reason)
{
HTTP res;
char line[2048],url[4096]={0};
char reason_e[512];
char aft[1024],statusline[128];
char cur_token[128];
char err_type[128];
char *erm[]={"code"};
char *erv[1];
int find;
int has_err=0,token_err=0;
int retry=0;
URLEncode(reason,strlen(reason),reason_e,510);
sprintf(url,"https://zh.wikipedia.org/w/api.php?action=edit&pageid=%s&basetimestamp=%s",p->id,p->time);
find=sprintf(aft,"&summary=%s&bot=1&minor=1&nocreate=1&format=xml&token=",reason_e);
if(find<0) return -4;
do
{
res=hopen();
retry++;
while(!hastoken)
{
Sleep(100);
}
EnterCriticalSection(&cs);
aft[find]=0;
strcat(aft,token);
LeaveCriticalSection(&cs);
hrewind(p->newtext);
if(!smartpost(url,p->newtext,aft,1,res))
{
return -1;
}
return -1;
hgets(statusline,127,res);
if(!strstr(statusline," 200"))
{
hclose(res);
return -2;
}
skipresponseheader(res);
token_err=has_err=0;
while(!heof(res))
{
if(xmlparsetag(res,line)==XML_HAS_VALUE)
{
if(!strcmp(line,"error"))
{
has_err=1;
xmlparsearg(res,1,erm,erv);
if((!strcmp(err_type,"notoken"))||(!strcmp(err_type,"badtoken")))
{
token_err=1;
}
break;
}
}
}
if(token_err)
{
EnterCriticalSection(&cs);
if(!strcmp(aft+find,cur_token)) hastoken=0;
LeaveCriticalSection(&cs);
}
retry++;
}while(token_err==1&&retry<3);
if(has_err) return -3;
else return 0;
}
static int pagecheck(char *pageid,char *basetime,HTTP f)
{
HTTP newtext;
char ch=0,cht=0;
char rdtemplate[100];
int alreadydone=0;
int todo=0;
newtext=hopen();
hputs("&text=",6,newtext);
todo=alreadydone=0;
while(xmlpulltext(f,&ch)==XML_TEXT_CONTINUE)
{
smartURLEncode(ch,newtext);
if(ch=='#'&&todo==0) todo=1;
if(cht=='{'&&ch=='{')
{
alreadydone=1;
break;
}
if(cht=='['&&ch=='['&&todo==1) todo=2;
cht=ch;
}
if(alreadydone||todo!=2)
{
hclose(newtext);
return 1;
}
else
{
struct neditargv p;
URLEncode(rdtemp_u,strlen(rdtemp_u),rdtemplate,98);
hputs(rdtemplate,strlen(rdtemplate),newtext);
p.id=pageid;
p.newtext=newtext;
p.time=basetime;
smartedit(&p,"bot: add redirect template");
}
hclose(newtext);
return 0;
}
static int proceedchild(char *ids)
{
char url[4096];
char buf[8192];
char pageid[256];
char timestamp[256];
char contentmodel[64],contentformat[64];
const char *ttm[]={"pageid"};
const char *tmm[]={"timestamp"};
const char *cmm[]={"contentmodel","contentformat"};
char *ttv[1];
char *tmv[1];
char *cmv[2];
int result;
int status;
HTTP h;
ttv[0]=pageid;
tmv[0]=timestamp;
cmv[0]=contentmodel;
cmv[1]=contentformat;
if(!ids)
{
return -1;
}
sprintf(url,"https://zh.wikipedia.org/w/api.php?action=query&format=xml&prop=revisions&rvprop=content|timestamp&pageids=%s&rvslots=main",ids);
h=hopen();
if(get(url,1,h))
{
hclose(h);
return -2;
}
if(skipresponseheader(h))
{
hclose(h);
return -3;
}
status=0;
while(!heof(h))
{
result=xmlparsetag(h,buf);
switch(status)
{
case 0:
if(result==XML_HAS_VALUE&&!strcmp(buf,"page"))
{
if(xmlparsearg(h,1,ttm,ttv)==XML_HAS_VALUE)
{
if(atoi(pageid)) status=1;
}
}
break;
case 1:
if(result==XML_HAS_VALUE&&!strcmp(buf,"rev"))
{
xmlparsearg(h,1,tmm,tmv);
status=2;
}
break;
case 2:
if(result==XML_HAS_VALUE&&!strcmp(buf,"slot"))
{
xmlparsearg(h,2,cmm,cmv);
if(!strcmp(contentmodel,"wikitext")&&!strcmp(contentformat,"text/x-wiki")) pagecheck(pageid,timestamp,h);
status=0;
}
break;
}
}
hclose(h);
return 0;
}
int judgeredirect(char *a,char *b)
{
unsigned int a_uni[4096],b_uni[4096];
unsigned int hant_a[4096],hant_b[4096];
int a_len,b_len;
if(!strcmp(a,b)) return 0;
if(utf8tounicode(a,a_uni)<=0) return 0;
if(b_len=utf8tounicode(b,b_uni)<=0) return 0;
a_len=zhconverter(a_uni,hant_a);
b_len=zhconverter(b_uni,hant_b);
if(a_len!=b_len) return 0;
{
int i=0;
for(i=0;i<b_len;i++)
{
if(hant_a[i]!=hant_b[i]) return 0;
}
}
return 1;
}
static int precheck(struct problemlist *p)
{
char url[4096];
char line[2048];
HTTP f;
char target[1024]={0};
char *mtc[]={"title"};
char *mtv[1];
mtv[0]=target;
sprintf(url,"https://zh.wikipedia.org/w/api.php?action=query&format=xml&pageids=%s",p->id);
f=hopen();
if(get(url,1,f))
{
hclose(f);
return -1;
}
if(skipresponseheader(f))
{
hclose(f);
return -1;
}
while(!heof(f))
{
xmlparsetag(f,line);
if(!strcmp(line,"page"))
{
xmlparsearg(f,1,mtc,mtv);
break;
}
}
hclose(f);
if(!target[0])
{
return -2;
}
if(judgeredirect(target,p->title))
{
proceedchild(p->id);
}
return 0;
}
void threadfunc(void *c)
{
int i=*(int *)c;
int ext=0;
int result=0;
struct problemlist *p;
char *title,*id;
while(!action) Sleep(1);
while(1)
{
EnterCriticalSection(&hcs);
if(p=pbl)
{
title=pbl->title;
id=pbl->id;
pbl=pbl->next;
}
else ext=1;
LeaveCriticalSection(&hcs);
if(ext) break;
else
{
result=precheck(p);
if(id) free(id);
if(title) free(title);
free(p);
}
}
if(result) exit(-1);
EnterCriticalSection(&tcs);
threadnumber--;
LeaveCriticalSection(&tcs);
return ;
}
int threadini(int count)
{
int i=0;
int flag=0;
threadnumber=0;
for(i=0;i<count;i++)
{
threadc[i]=i;
flag=_beginthread(threadfunc,0,(void *)(threadc+i));
if(flag>0) threadnumber++;
}
return 0;
}
int query(char *ns)
{
HTTP f;
char line[2048]={0},url[4096]={0},snd[4096]={0},id[512]={0},title[512]={0},sroffset[2048]={0},offseto[512]={0};
int status=0,next=0,count=0;
struct problemlist *temp=0;
char *ctm[]={"arcontinue"};
char *ctv[1];
char *idm[]={"fromid","title"};
char *idv[2];
ctv[0]=offseto;
idv[0]=id;
idv[1]=title;
sprintf(url,"https://zh.wikipedia.org/w/api.php?action=query&format=xml&list=allredirects&arlimit=5000&arnamespace=%s&arprop=title|ids",ns);
do
{
strcpy(snd,url);
if(next)
{
strcat(snd,"&arcontinue=");
strcat(snd,sroffset);
}
f=hopen();
if(get(snd,1,f))
{
hclose(f);
return 1;
}
skipresponseheader(f);
next=0;status=0;
do
{
xmlparsetag(f,line);
if(!next)
{
if(!strcmp(line,"continue"))
{
xmlparsearg(f,1,ctm,ctv);
URLEncode(offseto,strlen(offseto),sroffset,990);
next=1;
}
}
if(!strcmp(line,"r"))
{
xmlparsearg(f,2,idm,idv);
temp=(struct problemlist *)malloc(sizeof(struct problemlist));
temp->title=(char *)malloc(strlen(title)+5);
strcpy(temp->title,title);
temp->id=(char *)malloc(strlen(id)+5);
strcpy(temp->id,id);
temp->next=pbl;
pbl=temp;
}
}while(!heof(f));
hclose(f);
}while(next);
return 0;
}
#ifdef DEBUG_MODE
int main(void)
{
printf("%d\n",converterini("D:\Apache24\htdocs\w\includes\ZhConversion.php"));
printf("res=%d\n",judgeredirect(G2U("树"),G2U("樹")));//紀念巡迴演唱會 纪念巡回演唱会
return 0;
}
#else
int main(int argc,char *argv[])
{
int count=0;
if(parsearg(argc,argv))
{
printf("usage: -u username -p passwd -F conversion_table_file [-T concurrency -n namespace]\n");
return -1;
}
if(converterini(convertion_table))
{
printf("Load conversion table error!\n");
return -2;
}
rdtemp_u=G2U("{{简繁重定向}}");
InitializeCriticalSection(&cs);
InitializeCriticalSection(&tcs);
InitializeCriticalSection(&hcs);
buckini(20);
if(login(username,passwd))
{
printf("Login error!\n");
return -3;
}
hastoken=0;
printf("Login complete.\n");
fflush(stdout);
_beginthread(tokenmanage,0,0);
query(ns);
if(pbl==NULL)
{
printf("No page!\n");
return -4;
}
printf("Query complete.\n");
action=0;
threadini(maxthread);
action=1;
while(1)
{
EnterCriticalSection(&hcs);
if(pbl!=NULL)
{
LeaveCriticalSection(&hcs);
Sleep(1000);
}
else
{
LeaveCriticalSection(&hcs);
break;
}
}
count=0;
while(count<60)
{
count++;
EnterCriticalSection(&tcs);
if(threadnumber>0)
{
LeaveCriticalSection(&tcs);
printf("Waiting for all threads to exit. Current thread number: %d\n",threadnumber);
}
else
{
LeaveCriticalSection(&tcs);
break;
}
fflush(stdout);
Sleep(1000);
}
if(threadnumber==0)
{
DeleteCriticalSection(&tcs);
DeleteCriticalSection(&hcs);
hastoken=-1;
buckdestroy();
}
else printf("%d threads left.\n",threadnumber);
printf("---------------Ok done.---------------\n");
fflush(stdout);
system("PAUSE");
DeleteCriticalSection(&cs);
return 0;
}
#endif