PHP Classes

File: php-mail-extractor.php

Recommend this page to a friend!
  Classes of Vidar Vestnes   PHP Mail Extractor Script   php-mail-extractor.php   Download  
File: php-mail-extractor.php
Role: Application script
Content type: text/plain
Description: Extract mail addresses from your harddrive and saves to a file.
Class: PHP Mail Extractor Script
Find and extract email addresses from files
Author: By
Last change:
Date: 11 years ago
Size: 3,976 bytes
 

Contents

Class file image Download
<?php
/**
 *
 * PHP Mail Extractor v1.0
 *
 * Scan your disk and store all found email-addresses to a file.
 *
 * Recursivly scans all subfolders of the given start dir.
 *
 * Example of use from command prompt:
 * php email-extract.php "c:/myfolder" "c:/mail-list.txt"
 *
 *
 * By Vidar Vestnes 2013
 *
 * Require: PHP v4.4
 *
 */

/**
 * Add new function to make script PHP4 compatible.
 */
if (!function_exists('file_put_contents'))
{
    function
file_put_contents($filename, $data)
    {
       
$f = @fopen($filename, 'a+');
        if (!
$f)
        {
            return
false;
        }
        else
        {
           
$bytes = fwrite($f, $data);
           
fclose($f);
            return
$bytes;
        }
    }
}

class
PhpMailExtractor{
   
   
/**
     * Scan your disk and store all found email-addresses to a file.
     *
     * Recursivly scans all subfolders of the given start dir.
     *
     * Example of use from command prompt:
     * php email-extract.php "c:/myfolder" "c:/mail-list.txt"
     *
     * @param string $dir Directory where to start the scan
     * @param string $targetFile Filename where to store the result.
     * @param boolean $flushTargetFile Clear/empty the target file [Optional]
     * @param array $ignoreEmails If you wish to exclude some email addresses [Optional]
     */
   
function extract($dir, $targetFile, $flushTargetFile, &$ignoreEmails)
    {
        if(
$flushTargetFile && is_file($targetFile))
        {
            @
unlink($targetFile);
        }
   
       
$regex = '/[_a-z0-9-]+(\.[_a-z0-9-]+)*@[a-z0-9-]+(\.[a-z0-9-]+)*(\.[a-z]{2,3})/i';
   
       
$cdir = array();
       
$dh = @opendir($dir);
        while (
false !== ($filename = @readdir($dh))) {
           
$cdir[] = $filename;
        }
   
        if(!empty(
$cdir))
        {
            foreach (
$cdir as $key => $value)
            {
                if (!
in_array($value,array(".","..")))
                {
                   
$path = $dir . DIRECTORY_SEPARATOR . $value;
                    if (
is_dir($path))
                    {
                       
$this->extract($path, $targetFile, false, $ignoreEmails);
                    }
                    else if(
is_file($path) && filesize($path) < 10000)
                    {
                       
$emails = array();
                       
$content = @file_get_contents($path);
                       
$matches = array(); //create array
                       
preg_match_all($regex, $content, $matches); //find matching pattern
                           
                       
if(count($matches[0]))
                        {
                            foreach(
$matches[0] as $email)
                            {
                               
// Avoid storing duplicates
                               
$lc = strtolower($email);
                                if(!isset(
$ignoreEmails[$lc]))
                                {
                                    echo
$lc."\n";
                                   
$append = file_put_contents($targetFile, $lc . "\n", FILE_APPEND);
                                    if(
$append){
                                       
$ignoreEmails[$lc] = 1;
                                    }
                                    else{
                                        echo
"\n ERROR: Could not write to [outputfile]\n";
                                        exit;
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}



if(!isset(
$argv[1]) || !isset($argv[2]))
{
    echo
"\n";
    echo
"******************************************************\n";
    echo
" PHP Mail Extractor v1.0\n";
    echo
"******************************************************\n";
    echo
"\n";
    echo
" Scan your disk/folder recursivly and stores \n";
    echo
" all found email-addresses to a file.\n";
    echo
"\n";
    echo
" Only files less than 10Kb is scanned.\n";
    echo
"\n";
    echo
" Target file will always be flushed at start.\n";
    echo
"\n";
    echo
" How to use:\n";
    echo
" php-email-extractor.exe [start-folder] [outputfile]\n";
    echo
"\n";
    echo
" Example:\n";
    echo
" php-email-extractor.exe \"c:\\myfolder\" \"c:\\mail-list.txt\"\n";
    echo
"\n";
    echo
"\n";
    echo
"\n DISCLAIMER \n";
    echo
" This software is free, but used with no warranties\n";
    echo
" The developer can not be hold responsible for any damage,\n";
    echo
" directly or indirectly for use of this software.\n";
    echo
"\n";
    echo
"\n";
    echo
" By Vidar Vestnes - April 2013\n";
    echo
"\n";
    exit;
}
else if(!
is_dir($argv[1]))
{
    echo
" [start-folder] is not an accessable folder";
}
$arr = array();
$extractor = new PhpMailExtractor();
$extractor->extract($argv[1], $argv[2], true, $arr);