Monthly Archives: July 2011

Using node.js to store PHP sessions

We use sessions when we want to preserve certain data across subsequent accesses. PHP allows us to use different handlers when we’re using sessions. The default one is filesystem, but we can change it with session.save_handler in the php.ini. session.save_handler defines the name of the handler which is used for storing and retrieving data associated with a session. We also can create our own handler to manage sessions. In this post we’re going to create a custom handler to store sessions in a node.js service. Let’s start:

Imagine we’ve got the following php script:

session_start();

if (!isset($_SESSION["gonzalo"])) $_SESSION["gonzalo"] = 0;
$_SESSION["gonzalo"]++;
$_SESSION["arr"] = array('key' => uniqid());
var_dump($_SESSION);

A simple usage of sessions with PHP. If we reload the page our counter will be incremented by one. We’re using the default session handler. It works without any problem.

The idea is create a custom handler to use a server with node.js to store the session information instead of filesystem. To create custom handlers we need to use the PHP function: session_set_save_handler and rewrite the callbacks for: open, close, read, write, destroy and gc. PHP’s documentation is great. My proposal is the following one:

Our custom handler:

class NodeSession
{
    const NODE_DEF_HOST = '127.0.0.1';
    const NODE_DEF_PORT = 5672;

    static function start($host = self::NODE_DEF_HOST, $port = self::NODE_DEF_PORT)
    {
        $obj = new self($host, $port);
        session_set_save_handler(
            array($obj, "open"),
            array($obj, "close"),
            array($obj, "read"),
            array($obj, "write"),
            array($obj, "destroy"),
            array($obj, "gc"));
        session_start();
        return $obj;
    }

    private function unserializeSession($data)
    {
        if(  strlen( $data) == 0) {
            return array();
        }

        // match all the session keys and offsets
        preg_match_all('/(^|;|\})([a-zA-Z0-9_]+)\|/i', $data, $matchesarray, PREG_OFFSET_CAPTURE);
        $returnArray = array();

        $lastOffset = null;
        $currentKey = '';
        foreach ( $matchesarray[2] as $value ) {
            $offset = $value[1];
            if(!is_null( $lastOffset)) {
                $valueText = substr($data, $lastOffset, $offset - $lastOffset );
                $returnArray[$currentKey] = unserialize($valueText);
            }
            $currentKey = $value[0];

            $lastOffset = $offset + strlen( $currentKey )+1;
        }

        $valueText = substr($data, $lastOffset );
        $returnArray[$currentKey] = unserialize($valueText);

        return $returnArray;
    }
    
    function __construct($host = self::NODE_DEF_HOST, $port = self::NODE_DEF_PORT)
    {
        $this->_host = $host;
        $this->_port = $port;
    }

    function open($save_path, $session_name)
    {
        return true;
    }

    function close()
    {
        return true;
    }

    public function read($id)
    {
        return (string) $this->send(__FUNCTION__, array('id' => $id));
    }

    public function write($id, $data)
    {
        try {
            $this->send(__FUNCTION__, array(
                'id'       => $id,
                'data'     => $data,
                'time'     => time(),
                'dataJSON' => json_encode($this->unserializeSession($data))));
            return true;
        } catch (Exception $e) {
            return false;
        }
    }

    public function destroy($id)
    {
        try {
            $this->send(__FUNCTION__, array('id' => $id));
        } catch (Exception $e) {
            return false;
        }
         return true;
    }

    function gc($maxlifetime)
    {
        try {
            $this->send(__FUNCTION__, array('maxlifetime' => $maxlifetime, 'time' => time()));
        } catch (Exception $e) {
            return false;
        }
        return true;
    }

    private function send($action, $params)
    {
        $params = array('action' => $action) + $params;
        return file_get_contents("http://{$this->_host}:{$this->_port}?" . http_build_query($params));
    }
}

Our node.js server:

var http = require('http'),
    url  = require('url'),
    session = require('nodePhpSessions').SessionHandler;

var sessionHandler = new session();

var server = http.createServer(function (req, res) {
    var parsedUrl = url.parse(req.url, true).query;
    res.writeHead(200, {'Content-Type': 'text/plain'});
    res.end(sessionHandler.run(parsedUrl));
});

server.listen(5672, "127.0.0.1", function() {
  var address = server.address();
  console.log("opened server on %j", address);
});

As we can see we need the node.js module nodePhpSessions. You can easily install with:

npm install nodePhpSessions

You can see nodePhpSessions library here.

The library is tested with nodeunit. Without TDD is very hard to test things such as garbage collector.:

var session = require('nodePhpSessions').SessionHandler;
var sessionHandler = new session();
var parsedUrl;

exports["testReadUndefinedSession"] = function(test){
    parsedUrl = { action: 'read', id: 'ts49vmf0p732iafr25mdu8gvg2' };
    test.equal(sessionHandler.run(parsedUrl), undefined);
    test.done();
};

exports["oneSessionShouldReturns1"] = function(test){
    parsedUrl = {
        action: 'write',
        id: 'ts49vmf0p732iafr25mdu8gvg2',
        data: 'gonzalo|i:1;arr|a:1:{s:3:"key";s:13:"4e2b1a40d136a";}',
        time: '1311447616',
        dataJSON: '{"gonzalo":1,"arr":{"key":"4e2b1a40d136a"}}' };
    sessionHandler.run(parsedUrl);

    parsedUrl = { action: 'readAsArray', id: 'ts49vmf0p732iafr25mdu8gvg2' };
    test.equal(sessionHandler.run(parsedUrl).gonzalo, 1);
    test.done();
};

exports["oneSessionShouldReturns2"] = function(test){
    parsedUrl = {
        action: 'write',
        id: 'ts49vmf0p732iafr25mdu8gvg2',
        data: 'gonzalo|i:2;arr|a:1:{s:3:"key";s:13:"4e2b1a40d136a";}',
        time: '1311447616',
        dataJSON: '{"gonzalo":2,"arr":{"key":"4e2b1a40d136a"}}' };
    sessionHandler.run(parsedUrl);
    parsedUrl = { action: 'readAsArray', id: 'ts49vmf0p732iafr25mdu8gvg2' };
    test.equal(sessionHandler.run(parsedUrl).gonzalo, 2);
    test.done();
};

exports["destroySession"] = function(test){
    parsedUrl = {
        action: 'destroy',
        id: 'ts49vmf0p732iafr25mdu8gvg2'};
    sessionHandler.run(parsedUrl);

    parsedUrl = { action: 'readAsArray', id: 'ts49vmf0p732iafr25mdu8gvg2' };
    test.equal(sessionHandler.run(parsedUrl), undefined);

	test.done();
};

exports["garbageColector"] = function(test){
    parsedUrl = {
        action: 'write',
        id: 'session1',
        data: 'gonzalo|i:1;arr|a:1:{s:3:"key";s:13:"4e2b1a40d136a";}',
        time: '1111111200',
        dataJSON: '{"gonzalo":1,"arr":{"key":"4e2b1a40d136a"}}' };
    sessionHandler.run(parsedUrl);

    parsedUrl = {
        action: 'write',
        id: 'session2',
        data: 'gonzalo|i:1;arr|a:1:{s:3:"key";s:13:"4e2b1a40d136a";}',
        time: '1111111100',
        dataJSON: '{"gonzalo":1,"arr":{"key":"4e2b1a40d136a"}}' };
    sessionHandler.run(parsedUrl);

    parsedUrl = { action: 'gc', maxlifetime: '100', time: '1111111210'};
    sessionHandler.run(parsedUrl);

    parsedUrl = { action: 'readAsArray', id: 'session2' };
    test.equal(sessionHandler.run(parsedUrl), undefined);

    parsedUrl = { action: 'readAsArray', id: 'session1' };
    test.equal(sessionHandler.run(parsedUrl).gonzalo, 1);

    test.done();
};

Here you can see the output of the tests:

nodeunit testNodeSessions.js 

testNodeSessions.js
✔ testReadUndefinedSession
✔ oneSessionShouldReturns1
✔ oneSessionShouldReturns2
✔ destroySession
✔ garbageColector

OK: 6 assertions (5ms)

Now we change the original PHP script to:

include_once 'NodeSessions.php';
NodeSession::start();

if (!isset($_SESSION["gonzalo"])) $_SESSION["gonzalo"] = 0;
$_SESSION["gonzalo"]++;
$_SESSION["arr"] = array('key' => uniqid());
var_dump($_SESSION);

We start the node.js server:

node serverSessions.js 

Now if we reload our script in the browser we will see the same behaviour, but now our sessions are stored in the node.js server.

array(2) {
  ["gonzalo"]=>
  int(16)
  ["arr"]=>
  array(1) {
    ["key"]=>
    string(13) "4e2a9f6a966f4"
  }
}

This kind of techniques are good when clustering PHP applications.

Full code is available on github (node server, PHP handler, tests and examples) here.

Populating datagrid techniques with PHP

Today I want to speak about populating datagrid techniques with PHP. At least in my daily work datagrids and tabular data are very common, because of that I want to show two different techniques when populating datagrids with data from our database. Maybe it’s obvious, but I want to show the differences. Let’s start.

Imagine we need to fetch data from our database and show it in a datagrid. Let’s do the traditional way. I haven’t use any framework for this example. Just old school spaghetti code.

$dbh = new PDO('pgsql:dbname=mydb;host=localhost', 'gonzalo', 'password');
$dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);

$stmt = $dbh->prepare('SELECT * FROM test.tbl1 limit 10');
$stmt->setFetchMode(PDO::FETCH_ASSOC);
$stmt->execute();

$data = $stmt->fetchAll();

$table = "";
$table.= "<table>";
foreach ($data as $ow) {
    $table.= "<tr>";
        foreach ($ow as $item) {
            $table.= "<td>{$item}</td>";
        }
    $table.= "</tr>";
}
$table.= "</table>";
?>
<!DOCTYPE html>
<html>
    <head>
        <meta charset="utf-8" />
        <title>inline grid</title>
    </head>
    <h1>inline grid</h1>
    <body>
        <?php echo $table; ?>
        <script type="text/javascript" src=" https://ajax.googleapis.com/ajax/libs/jquery/1.6.2/jquery.min.js"></script>
    </body>
</html>

And that’s all. The code works, and we’ve got or ugly datagrid with the data from our DB. Where’s the problem? If our “SELECT” statement is fast enougth and our connection to the database is good too, the page load will be good, indeed. But what happens if or query is slow (or we even have more than one)? The whole page load will be penalized due to our slow query. The user won’t see anything until our server finish with all the work. That means bad user experience. The alternative is load the page first (without populated datagrid, of course) and when it’s ready, we load with ajax the data from the server (JSON) and we populate the datagrid with javaScript.

Page without the populated datagrid:

<!DOCTYPE html>
<html>
    <head>
        <meta charset="utf-8" />
        <title>grid ajax</title>
    </head>
    <h1>grid ajax</h1>
    <body>
        <table id='grid'></table>

        <script type="text/javascript" src=" https://ajax.googleapis.com/ajax/libs/jquery/1.6.2/jquery.min.js"></script>
        <script type="text/javascript">
        $(function(){
            $.getJSON("json.php", function(json){
                        for (var i=0;i<json.length;i++) {
                            $('#grid').append("<tr><td>" + json[i].id + "</td><td>" + json[i].field1 + "</td></tr>")
                        }
                    });
        });
        </script>
    </body>
</html>

JSON data fron the server:

$dbh = new PDO('pgsql:dbname=mydb;host=localhost', 'gonzalo', 'password');
$dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);

$stmt = $dbh->prepare('SELECT * FROM test.tbl1 limit 10');
$stmt->setFetchMode(PDO::FETCH_ASSOC);
$stmt->execute();

$data = $stmt->fetchAll();

header('Cache-Control: no-cache, must-revalidate');
header('Expires: Mon, 26 Jul 1997 05:00:00 GMT');
header('Content-type: application/json');

echo json_encode($data);

The outcome of this second technique is the same than the first one, but now user will see the page faster than the first technique and data will load later. Probably the total time to finish all the work is better in the classical approach, but the UX will be better with the second one. Here you can see the times taken from Chorme’s network window:

Even though the total time is better in the inline grid example: 156ms vs 248ms, 1 http request vs 3 HTTP request. The user will see the page (without data) faster with the grid data example.

What’s better. As always it depends on our needs. We need to balance and choose the one that fits with your requirements.

Probably the JavaScript code that I use to populate the datagrid in the second technique could be written in a more efficient way (there’s also plugins to do it). I only want to show the indirect way to create HTML to improve the user experience of our applications.

Database abstraction layers in PHP. PDO versus DBAL

I normally use PDO in my PHP projects. I like it because it’s a PHP extension easy to use and shares the same interface between all databases. Normally I use PostgreSQL but if I change to mySql or Oracle I don’t need to use different functions to handle the database connections.

PHP has a great project called Doctrine2. Doctrine2 is a ORM and it uses its own database abstraction layer called DBAL. In fact DBAL isn’t a pure database abstraction layer. It’s built over PDO. It’s a set of PHP classes we can use that gives us features not available with ‘pure’ PDO. If we use Doctrine2 we’re using DBAL behind the scene, but we don’t need to use Doctrine2 to use DBAL. We can use DBAL as a database abstraction layer without any ORM. Obiously this extra PHP layer over our PDO extension needs to pay a fee. I will have a look to this fee in this post. I will take one of my old post about PDO and I will do the same with DBAL to see the performance differences. Let’s start:

The PDO version:

error_reporting(-1);
$time = microtime(TRUE);
$mem = memory_get_usage();

$dbh = new PDO('pgsql:dbname=mydb;host=localhost', 'gonzalo', 'password');
$dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);

$dbh->beginTransaction();

$smtp = $dbh->prepare('INSERT INTO test.tbl1 (id, field1) values (:ID, :FIELD1)');

for ($i=0; $i<1000; $i++) {
    $smtp->execute(array('ID' => $i, 'FIELD1' => "field {$i}"));
}

$dbh->commit();

$stmt = $dbh->prepare('SELECT * FROM test.tbl1 limit 10000');
$stmt->execute();

$i=0;
while ($row = $stmt->fetch()) {
	$i++;
}
echo '<h1>PDO</h1>';
echo "<strong>{$i} </strong>";

print_r(array('memory' => (memory_get_usage() - $mem) / (1024 * 1024), 'seconds' => microtime(TRUE) - $time));

$dbh->beginTransaction();
$smtp = $dbh->prepare('delete from test.tbl1');
$smtp->execute();
$dbh->commit();

The DBAL version:

error_reporting(-1);
$time = microtime(TRUE);
$mem = memory_get_usage();

use Doctrine\DBAL\DriverManager;

$connectionParams = array(
    'dbname'   => 'mydb',
    'user'     => 'gonzalo',
    'password' => 'password',
    'host'     => 'localhost',
    'driver'   => 'pdo_pgsql',
    );

$dbh = DriverManager::getConnection($connectionParams);

$dbh->beginTransaction();

$smtp = $dbh->prepare('INSERT INTO test.tbl1 (id, field1) values (:ID, :FIELD1)');

for ($i=0; $i<1000; $i++) {
    $smtp->execute(array('ID' => $i, 'FIELD1' => "field {$i}"));
}

$dbh->commit();

$stmt = $dbh->prepare('SELECT * FROM test.tbl1 limit 10000');
$stmt->execute();

$i=0;
while ($row = $stmt->fetch()) {
	$i++;
}
echo '<h1>DBAL</h1>';
echo "<strong>{$i} </strong>";

print_r(array('memory' => (memory_get_usage() - $mem) / (1024 * 1024), 'seconds' => microtime(TRUE) - $time));

As we can see DBAL is slower than pure PDO (obiously). Anyway the most of the extra time of DBAL is the time we need to include php classes (remember PDO is a PHP extension and we dont need to include any file). If we take times excluding the include time, the memory usage is almost the same and the execution time a little slower.

Autoload for DBAL version:

spl_autoload_register(function ($class) {
        $class = str_replace('\\', '/', $class) . '.php';
        require_once($class);
    }
);

or hardcoded includes for this example

require_once('Doctrine/DBAL/Driver.php');
require_once('Doctrine/DBAL/Driver/Connection.php');
require_once('Doctrine/DBAL/Platforms/AbstractPlatform.php');
require_once('Doctrine/DBAL/Driver/Statement.php');

require_once('Doctrine/DBAL/DriverManager.php');
require_once('Doctrine/DBAL/Configuration.php');
require_once('Doctrine/Common/EventManager.php');
require_once('Doctrine/DBAL/Driver/PDOPgSql/Driver.php');
require_once('Doctrine/DBAL/Driver.php');
require_once('Doctrine/DBAL/Connection.php');
require_once('Doctrine/DBAL/Driver/Connection.php');
require_once('Doctrine/DBAL/Query/Expression/ExpressionBuilder.php');
require_once('Doctrine/DBAL/Platforms/PostgreSqlPlatform.php');
require_once('Doctrine/DBAL/Platforms/AbstractPlatform.php');
require_once('Doctrine/DBAL/Driver/PDOConnection.php');
require_once('Doctrine/DBAL/Driver/PDOStatement.php');
require_once('Doctrine/DBAL/Driver/Statement.php');
require_once('Doctrine/DBAL/Events.php');
require_once('Doctrine/DBAL/Statement.php');

Outcomes of the tests:

With pure PDO:

  • memory: 0.0044288635253906
  • seconds: 0.24748301506042

With DBAL and autoload:

  • memory: 0.97610473632812
  • seconds: 0.29042816162109

With DBAL and hardcoded requires:

  • memory: 0.97521591186523
  • seconds: 0.31192088127136

With DBAL bypassing the include part:

  • memory: 0.0099525451660156
  • seconds: 0.30333304405212

The fee we paid for using DBAL gives us some extra features. OK we don’t need DBAL to get those features. If we code a bit we can get them (remember DBAL is nothing but a PHP extra layer). But DBAL has a great interface a well documented. Now I’m going to list a few extra features from DBAL very interesting, at least for me:

Transactional mode

I really like it. It allows us to create scripts like that:

$dbh->transactional(function($conn) {
    $smtp = $conn->prepare('INSERT INTO wf.tbl1 (id, field1) values (:ID, :FIELD1)');

    for ($i=0; $i<1000; $i++) {
        $smtp->execute(array('ID' => $i, 'FIELD1' => "field {$i}"));
    }
});

A simple closure will make the code more concise and it will commit/rollback our transaction for us. In fact I borrowed this function in my PDO projects to use this interface. I love Open source.

Snippet from DBAL library:

    /**
     * Executes a function in a transaction.
     *
     * The function gets passed this Connection instance as an (optional) parameter.
     *
     * If an exception occurs during execution of the function or transaction commit,
     * the transaction is rolled back and the exception re-thrown.
     *
     * @param Closure $func The function to execute transactionally.
     */
    public function transactional(Closure $func)
    {
        $this->beginTransaction();
        try {
            $func($this);
            $this->commit();
        } catch (Exception $e) {
            $this->rollback();
            throw $e;
        }
    }

Types conversion

Really useful, at least for when I work with dates:

$date = new \DateTime("2011-03-05 14:00:21");
$stmt = $conn->prepare("SELECT * FROM articles WHERE publish_date > ?");
$stmt->bindValue(1, $date, "datetime");
$stmt->execute();

List of Parameters Conversion

It’s a cool feature too available in DBAL since Doctrine 2.1

$dbh->executeQuery('SELECT * FROM wf.tbl1 WHERE id IN (?)',
    array(array(1, 2, 3, 4, 5, 6)),
    array(\Doctrine\DBAL\Connection::PARAM_INT_ARRAY));

Bind parameters with IN clause with PDO is a bit ugly. We need to create a series of bind parameters depending on our list to map them within the SQL. It’s possible but DBAL interface is smarter.

Transaction Nesting

Another cool feature:

$dbh->beginTransaction();
try {
    $dbh->beginTransaction();
    try {
        $smtp = $dbh->prepare('INSERT INTO wf.tbl1 (id, field1) values (:ID, :FIELD1)');

        for ($i=0; $i<1000; $i++) {
            $smtp->execute(array('ID' => $i, 'FIELD1' => "field {$i}"));
        }

        } catch (Exception $e) {
            $dbh->rollback(); //transaction marked for rollback only
            throw $e;
        }
    $smtp = $dbh->prepare('INSERT INTO wf.tbl1 (id, field1) values (:ID, :FIELD1)');

    for ($i=0; $i<1000; $i++) {
        $smtp->execute(array('ID' => $i, 'FIELD1' => "field {$i}"));
    }

    $dbh->commit(); // real transaction committed
} catch (Exception $e) {
    $dbh->rollback(); // transaction rollback
    throw $e;
}

This piece of code with PDO will throw the following error:
There is already an active transaction
but it works with DBAL. If we need to do this kind of things with PDO we need to use savepoints and things like that. DBAL does the ugly part for us.

New features in PHP5.4 alpha1

We already have the php5.4 alpha ready for testing. This new release has a few features that I really like. The lack of those features gave me problems in the past. Because of that, I’m very excited waiting for the new stable release of PHP 5.4.
Now I’m going to list the new features I like. There’re more features (check the full list here) but those ones are the ones I’m really waiting for:

Added array dereferencing support

When we’re doing OO we can do things like that:

class A
{
    public function foo()
    {
        return new B();
    }
}

class B
{
    public function bar()
    {
        return "Hi";
    }
}

$a = new A();
echo $a->foo()->bar(); // Will output Hi

But if we output an array instead of an instance of class, we will need an extra variable to use it:

class A
{
    public function foo()
    {
        return array('name' => 'Gonzalo');
    }
}

$a = new A();
$foo = $a->foo();
echo $foo['name'];

Now with PHP 5.4 we’ll use:

$a = new A();
echo $a->foo()['name'];

Cool, isn’t it?

Added indirect method call through array

Now an array(‘class’, ‘method’) can be used to call a function. is_callable will return true.

That means we can do:

class Foo {
   public function bar($name) {
      echo "Hi, $name";
   }
}

$f = array('Foo','bar');
echo $f('Gonzalo'); // Hi, Gonzalo

More information here

Added closure $this support back

When we use closures in PHP5.3 and we want to use $this statement, we need to do some strange hacks to use it in the context of the callback. Things like $that = $this; and use($that). It works, but looks like an ugly hack. Now we can do:

class A {
  private $value = 1;
  function single_getter($name) {
    return function() use ($name) {
      return $this->$name;
    };
  }
}
class B {
  private $value = 2;
  function test() {  
    $a = new A;
    $single_getter = $a->single_getter('value');
    print $single_getter();
  }
}  

$b = new B;
$b->test();

PHP5.3 -> Fatal error: Using $this when not in object context
PHP5.4 -> the script will output “1”

More information here.

Added support for Traits

And finally we will enjoy of traits:
Here we can read a really good article about it:

With Traits we can share interfaces and avoid inheritance nightmares to reuse code. Clean and simple.

Conclusions

In my handle opinion those new features will help PHP to grow a little bit more and adapt itself to new years. It’s a pity we still don’t have a short way to define arrays and we still need to use array(1, 2, 3) instead of doing things like [1, 2, 3], but I hope this new feature will be available in future versions I’m not sure but I think it’s on the roadmap.

Follow

Get every new post delivered to your Inbox.

Join 989 other followers