#include <stdio.h>
#include <time.h>
#include <string.h>
#include <malloc.h>
#include <pthread.h>
#include "network.h"
#include "convert.h"
#include "auth.h"
struct problemlist
{
char *title;
char *id;
struct problemlist *next;
} ;
FILE *debug;
clock_t start;
int threadc[1200];
char threads[1050][3000];
pthread_t threadpool[1200];
int threadnumber=0;
pthread_mutex_t cs;
pthread_mutex_t tcs;
pthread_mutex_t lock;
pthread_mutex_t hcs;
char matchstring[1000];
int matchlength;
int *nextm;
struct problemlist *head;
int action=0;
int parsearg(int argc,char *argv[],char *user,char *passwd,char *filen,char *target);
int kmp(HTTP h);
void *threadfunc(void *i);
int threadini(int count);
int proceedchild(char *title);
struct problemlist *allpagequery();
int kmpini();
int main(int argc,char *argv[])
{ int count=0;
char username[500];
char passwd[300];
char filen[100];
clock_t end;
start=clock();
if(parsearg(argc,argv,username,passwd,filen,matchstring))
{
printf("useage:%s -u username -p password -f filenam -s string\n",argv[0]);
return -1;
}
debug=fopen(filen,"w+");
if(!debug)
{
printf("can't create file.\n");
return -1;
}
pthread_mutex_init(&cs,NULL);
pthread_mutex_init(&lock,NULL);
pthread_mutex_init(&tcs,NULL);
pthread_mutex_init(&hcs,NULL);
count=login(username,passwd);
if(count)
{
printf("login error.\n");
return -1;
}
kmpini();
head=allpagequery();
if(head==NULL)
{
printf("Query page error.\n");
return -1;
}
printf("query ok\n");
fflush(stdout);
threadini(1010);
printf("Create %d threads to go through the problem list.\n",threadnumber);
fflush(stdout);
action=1;
while(head!=NULL)
{
sleep(1);
}
count=0;
while(count<20)
{
count++;
pthread_mutex_lock(&tcs);
if(threadnumber>0)
{
pthread_mutex_unlock(&tcs);
printf("waiting for all threads to exit. Current thread number: %d\n",threadnumber);
}
else
{
pthread_mutex_unlock(&tcs);
break;
}
fflush(stdout);
sleep(1);
}
fclose(debug);
end=clock();
printf("----------------end time:%f-------------------\n",(double)(end-start)/CLOCKS_PER_SEC);
fflush(stdout);
pthread_mutex_destroy(&cs);
pthread_mutex_destroy(&tcs);
pthread_mutex_destroy(&lock);
pthread_mutex_destroy(&hcs);
return 0;
}
int parsearg(int argc,char *argv[],char *user,char *passwd,char *filen,char *target)
{
int hasu=0,hasp=0,hasf=0,hast=0;
int i=0;
for(i=1;i<argc;i++)
{
if(argv[i][0]=='-'&&argv[i+1]&&argv[i+1][0]!='-')
{
switch(argv[i][1])
{
case 'u':
strcpy(user,argv[i+1]);
hasu=1;
break;
case 'f':
strcpy(filen,argv[i+1]);
hasf=1;
break;
case 'p':
strcpy(passwd,argv[i+1]);
hasp=1;
break;
case 's':
strcpy(target,argv[i+1]);
hast=1;
break;
}
i++;
}
}
if(hasu&&hasf&&hasp&&hast) return 0;
else return 1;
}
int threadini(int count)
{
pthread_attr_t a;
int i=0;
int flag=0;
pthread_attr_init (&a);
pthread_attr_setdetachstate (&a, PTHREAD_CREATE_DETACHED);
threadnumber=0;
for(i=0;i<count;i++)
{
threadc[i]=i;
threads[i][0]=0;
flag=pthread_create(&threadpool[i],&a,threadfunc,(void *)(threadc+i));
if(!flag) threadnumber++;
}
pthread_attr_destroy(&a);
return 0;
}
void *threadfunc(void *c)
{
int i=*(int *)c;
int exit=0;
while(!action) sleep(1);
while(1)
{
pthread_mutex_lock(&hcs);
if(head)
{
strcpy(threads[i],head->title);
head=head->next;
}
else exit=1;
pthread_mutex_unlock(&hcs);
if(exit) break;
else
{
proceedchild(threads[i]);
}
}
pthread_mutex_lock(&tcs);
threadnumber--;
pthread_mutex_unlock(&tcs);
return NULL;
}
int proceedchild(char *title)
{
HTTP f;
char c[3000],tt[5000];
int todo=0;
if(!title)
{
return -1;
}
strcpy(c,"/w/index.php?action=raw&title=");
URLEncode(title,strlen(title),tt,500);
strcat(c,tt);
f=hopen();
if(get(c,0,f))
{
hclose(f);
return -5;
}
if(skipresponseheader(f)) return -1;
todo=kmp(f);
if(todo)
{
pthread_mutex_lock(&lock);
fprintf(debug,"#[[%s]] \r\n",title);
fflush(debug);
pthread_mutex_unlock(&lock);
}
hclose(f);
return 0;
}
struct problemlist *allpagequery()
{
HTTP f;
char line[2000]={0},url[1000]={0},snd[1000]={0},ecd[400]={0},id[400]={0},title[400]={0},sroffset[1000]={0},offseto[1000]={0};
int status=0,next=0,count=0;
struct problemlist *pre,*temp,*head=0;
char *ctm[]={"apcontinue"};
char *ctv[1];
char *idm[]={"pageid","title"};
char *idv[2];
ctv[0]=offseto;
idv[0]=id;
idv[1]=title;
strcpy(url,"/w/api.php?action=query&format=xml&list=allpages&apnamespace=0&aplimit=500");
do
{
strcpy(snd,url);
if(next)
{
strcat(snd,"&apcontinue=");
strcat(snd,sroffset);
}
f=hopen();
if(get(snd,1,f))
{
hclose(f);
return NULL;
}
skipresponseheader(f);
next=0;status=0;
do
{
xmlparsetag(f,line);
if(!next&&!strcmp(line,"continue"))
{
xmlparsearg(f,1,ctm,ctv);
URLEncode(offseto,strlen(offseto),sroffset,990);
next=1;
}
if(!strcmp(line,"p"))
{
xmlparsearg(f,2,idm,idv);
temp=(struct problemlist *)malloc(sizeof(struct problemlist));
temp->title=(char *)malloc(strlen(title)+5);
strcpy(temp->title,title);
temp->id=(char *)malloc(strlen(id)+5);
strcpy(temp->id,id);
temp->next=0;
if(head)
{
pre->next=temp;
pre=temp;
}
else pre=head=temp;
}
}while(!heof(f));
hclose(f);
}while(next);
return head;
}
int kmpini()
{
int i,j;
matchlength=strlen(matchstring);
nextm=(int *)malloc(matchlength*sizeof(int));
i=0;
nextm[0]=j=-1;
while(i<matchlength-1)
{
if(j==-1)
{
j=0;
i++;
nextm[i]=0;
}
else if(matchstring[i]==matchstring[j])
{
i++;j++;
nextm[i]=j;
}
else j=nextm[j];
}
return 0;
}
int kmp(HTTP h)
{
char ch;
int i=0;
ch=hgetc(h);
while(!heof(h)&&i<matchlength)
{
if(i==-1)
{
ch=hgetc(h);
i=0;
}
else if(ch==matchstring[i])
{
ch=hgetc(h);
i++;
}
else i=nextm[i];
}
if(i==matchlength) return 1;
else return 0;
}